From 029d7ec3e937fe302d58b393c422195e5a2adc1d Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Tue, 8 Jul 2025 18:18:55 +0000 Subject: [PATCH] [dev.simd] cmd/compile, simd: rename Masked$OP to $(OP)Masked. This CL is generated by CL 686575. Change-Id: I1483189a1ae9bed51446fd69daab3f7b128549ae Reviewed-on: https://go-review.googlesource.com/c/go/+/686516 Reviewed-by: David Chase TryBot-Bypass: David Chase --- src/cmd/compile/internal/amd64/simdssa.go | 92 +- .../compile/internal/ssa/_gen/simdAMD64.rules | 1530 +- .../compile/internal/ssa/_gen/simdAMD64ops.go | 718 +- .../internal/ssa/_gen/simdgenericOps.go | 1530 +- src/cmd/compile/internal/ssa/opGen.go | 15092 ++++---- src/cmd/compile/internal/ssa/rewriteAMD64.go | 31352 ++++++++-------- .../compile/internal/ssagen/simdintrinsics.go | 1530 +- src/simd/ops_amd64.go | 8108 ++-- src/simd/simd_test.go | 6 +- src/simd/simd_wrapped_test.go | 2578 +- 10 files changed, 31268 insertions(+), 31268 deletions(-) diff --git a/src/cmd/compile/internal/amd64/simdssa.go b/src/cmd/compile/internal/amd64/simdssa.go index 2266f8d7ef..50339bf202 100644 --- a/src/cmd/compile/internal/amd64/simdssa.go +++ b/src/cmd/compile/internal/amd64/simdssa.go @@ -425,12 +425,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPMINUQMasked128, ssa.OpAMD64VPMINUQMasked256, ssa.OpAMD64VPMINUQMasked512, - ssa.OpAMD64VMULPSMasked128, - ssa.OpAMD64VMULPSMasked256, - ssa.OpAMD64VMULPSMasked512, - ssa.OpAMD64VMULPDMasked128, - ssa.OpAMD64VMULPDMasked256, - ssa.OpAMD64VMULPDMasked512, ssa.OpAMD64VSCALEFPSMasked128, ssa.OpAMD64VSCALEFPSMasked256, ssa.OpAMD64VSCALEFPSMasked512, @@ -458,6 +452,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPMULLQMasked128, ssa.OpAMD64VPMULLQMasked256, ssa.OpAMD64VPMULLQMasked512, + ssa.OpAMD64VMULPSMasked128, + ssa.OpAMD64VMULPSMasked256, + ssa.OpAMD64VMULPSMasked512, + ssa.OpAMD64VMULPDMasked128, + ssa.OpAMD64VMULPDMasked256, + ssa.OpAMD64VMULPDMasked512, ssa.OpAMD64VPORDMasked128, ssa.OpAMD64VPORDMasked256, ssa.OpAMD64VPORDMasked512, @@ -888,12 +888,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPEXTRQ128: p = simdFpgpImm8(s, v) - case ssa.OpAMD64VGF2P8AFFINEQBMasked128, - ssa.OpAMD64VGF2P8AFFINEQBMasked256, - ssa.OpAMD64VGF2P8AFFINEQBMasked512, - ssa.OpAMD64VGF2P8AFFINEINVQBMasked128, + case ssa.OpAMD64VGF2P8AFFINEINVQBMasked128, ssa.OpAMD64VGF2P8AFFINEINVQBMasked256, ssa.OpAMD64VGF2P8AFFINEINVQBMasked512, + ssa.OpAMD64VGF2P8AFFINEQBMasked128, + ssa.OpAMD64VGF2P8AFFINEQBMasked256, + ssa.OpAMD64VGF2P8AFFINEQBMasked512, ssa.OpAMD64VPSHLDWMasked128, ssa.OpAMD64VPSHLDWMasked256, ssa.OpAMD64VPSHLDWMasked512, @@ -1017,12 +1017,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VFMSUBADD213PDMasked128, ssa.OpAMD64VFMSUBADD213PDMasked256, ssa.OpAMD64VFMSUBADD213PDMasked512, - ssa.OpAMD64VGF2P8AFFINEQBMasked128, - ssa.OpAMD64VGF2P8AFFINEQBMasked256, - ssa.OpAMD64VGF2P8AFFINEQBMasked512, ssa.OpAMD64VGF2P8AFFINEINVQBMasked128, ssa.OpAMD64VGF2P8AFFINEINVQBMasked256, ssa.OpAMD64VGF2P8AFFINEINVQBMasked512, + ssa.OpAMD64VGF2P8AFFINEQBMasked128, + ssa.OpAMD64VGF2P8AFFINEQBMasked256, + ssa.OpAMD64VGF2P8AFFINEQBMasked512, ssa.OpAMD64VGF2P8MULBMasked128, ssa.OpAMD64VGF2P8MULBMasked256, ssa.OpAMD64VGF2P8MULBMasked512, @@ -1086,12 +1086,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPMINUQMasked128, ssa.OpAMD64VPMINUQMasked256, ssa.OpAMD64VPMINUQMasked512, - ssa.OpAMD64VMULPSMasked128, - ssa.OpAMD64VMULPSMasked256, - ssa.OpAMD64VMULPSMasked512, - ssa.OpAMD64VMULPDMasked128, - ssa.OpAMD64VMULPDMasked256, - ssa.OpAMD64VMULPDMasked512, ssa.OpAMD64VSCALEFPSMasked128, ssa.OpAMD64VSCALEFPSMasked256, ssa.OpAMD64VSCALEFPSMasked512, @@ -1119,18 +1113,24 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPMULLQMasked128, ssa.OpAMD64VPMULLQMasked256, ssa.OpAMD64VPMULLQMasked512, + ssa.OpAMD64VMULPSMasked128, + ssa.OpAMD64VMULPSMasked256, + ssa.OpAMD64VMULPSMasked512, + ssa.OpAMD64VMULPDMasked128, + ssa.OpAMD64VMULPDMasked256, + ssa.OpAMD64VMULPDMasked512, ssa.OpAMD64VPORDMasked128, ssa.OpAMD64VPORDMasked256, ssa.OpAMD64VPORDMasked512, ssa.OpAMD64VPORQMasked128, ssa.OpAMD64VPORQMasked256, ssa.OpAMD64VPORQMasked512, - ssa.OpAMD64VPMADDWDMasked128, - ssa.OpAMD64VPMADDWDMasked256, - ssa.OpAMD64VPMADDWDMasked512, ssa.OpAMD64VPDPWSSDMasked128, ssa.OpAMD64VPDPWSSDMasked256, ssa.OpAMD64VPDPWSSDMasked512, + ssa.OpAMD64VPMADDWDMasked128, + ssa.OpAMD64VPMADDWDMasked256, + ssa.OpAMD64VPMADDWDMasked512, ssa.OpAMD64VPOPCNTBMasked128, ssa.OpAMD64VPOPCNTBMasked256, ssa.OpAMD64VPOPCNTBMasked512, @@ -1188,9 +1188,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPDPBUSDSMasked128, ssa.OpAMD64VPDPBUSDSMasked256, ssa.OpAMD64VPDPBUSDSMasked512, - ssa.OpAMD64VPSLLQMasked128, - ssa.OpAMD64VPSLLQMasked256, - ssa.OpAMD64VPSLLQMasked512, ssa.OpAMD64VPSHLDWMasked128, ssa.OpAMD64VPSHLDWMasked256, ssa.OpAMD64VPSHLDWMasked512, @@ -1200,9 +1197,9 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPSHLDQMasked128, ssa.OpAMD64VPSHLDQMasked256, ssa.OpAMD64VPSHLDQMasked512, - ssa.OpAMD64VPSRLQMasked128, - ssa.OpAMD64VPSRLQMasked256, - ssa.OpAMD64VPSRLQMasked512, + ssa.OpAMD64VPSLLQMasked128, + ssa.OpAMD64VPSLLQMasked256, + ssa.OpAMD64VPSLLQMasked512, ssa.OpAMD64VPSHRDWMasked128, ssa.OpAMD64VPSHRDWMasked256, ssa.OpAMD64VPSHRDWMasked512, @@ -1212,18 +1209,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPSHRDQMasked128, ssa.OpAMD64VPSHRDQMasked256, ssa.OpAMD64VPSHRDQMasked512, + ssa.OpAMD64VPSRLQMasked128, + ssa.OpAMD64VPSRLQMasked256, + ssa.OpAMD64VPSRLQMasked512, ssa.OpAMD64VPSRAQMasked128, ssa.OpAMD64VPSRAQMasked256, ssa.OpAMD64VPSRAQMasked512, - ssa.OpAMD64VPSLLVWMasked128, - ssa.OpAMD64VPSLLVWMasked256, - ssa.OpAMD64VPSLLVWMasked512, - ssa.OpAMD64VPSLLVDMasked128, - ssa.OpAMD64VPSLLVDMasked256, - ssa.OpAMD64VPSLLVDMasked512, - ssa.OpAMD64VPSLLVQMasked128, - ssa.OpAMD64VPSLLVQMasked256, - ssa.OpAMD64VPSLLVQMasked512, ssa.OpAMD64VPSHLDVWMasked128, ssa.OpAMD64VPSHLDVWMasked256, ssa.OpAMD64VPSHLDVWMasked512, @@ -1233,15 +1224,15 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPSHLDVQMasked128, ssa.OpAMD64VPSHLDVQMasked256, ssa.OpAMD64VPSHLDVQMasked512, - ssa.OpAMD64VPSRLVWMasked128, - ssa.OpAMD64VPSRLVWMasked256, - ssa.OpAMD64VPSRLVWMasked512, - ssa.OpAMD64VPSRLVDMasked128, - ssa.OpAMD64VPSRLVDMasked256, - ssa.OpAMD64VPSRLVDMasked512, - ssa.OpAMD64VPSRLVQMasked128, - ssa.OpAMD64VPSRLVQMasked256, - ssa.OpAMD64VPSRLVQMasked512, + ssa.OpAMD64VPSLLVWMasked128, + ssa.OpAMD64VPSLLVWMasked256, + ssa.OpAMD64VPSLLVWMasked512, + ssa.OpAMD64VPSLLVDMasked128, + ssa.OpAMD64VPSLLVDMasked256, + ssa.OpAMD64VPSLLVDMasked512, + ssa.OpAMD64VPSLLVQMasked128, + ssa.OpAMD64VPSLLVQMasked256, + ssa.OpAMD64VPSLLVQMasked512, ssa.OpAMD64VPSHRDVWMasked128, ssa.OpAMD64VPSHRDVWMasked256, ssa.OpAMD64VPSHRDVWMasked512, @@ -1251,6 +1242,15 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPSHRDVQMasked128, ssa.OpAMD64VPSHRDVQMasked256, ssa.OpAMD64VPSHRDVQMasked512, + ssa.OpAMD64VPSRLVWMasked128, + ssa.OpAMD64VPSRLVWMasked256, + ssa.OpAMD64VPSRLVWMasked512, + ssa.OpAMD64VPSRLVDMasked128, + ssa.OpAMD64VPSRLVDMasked256, + ssa.OpAMD64VPSRLVDMasked512, + ssa.OpAMD64VPSRLVQMasked128, + ssa.OpAMD64VPSRLVQMasked256, + ssa.OpAMD64VPSRLVQMasked512, ssa.OpAMD64VPSRAVWMasked128, ssa.OpAMD64VPSRAVWMasked256, ssa.OpAMD64VPSRAVWMasked512, diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules index bcd227d4b9..7ea24fe95c 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules +++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules @@ -12,6 +12,18 @@ (AbsoluteInt64x2 ...) => (VPABSQ128 ...) (AbsoluteInt64x4 ...) => (VPABSQ256 ...) (AbsoluteInt64x8 ...) => (VPABSQ512 ...) +(AbsoluteMaskedInt8x16 x mask) => (VPABSBMasked128 x (VPMOVVec8x16ToM mask)) +(AbsoluteMaskedInt8x32 x mask) => (VPABSBMasked256 x (VPMOVVec8x32ToM mask)) +(AbsoluteMaskedInt8x64 x mask) => (VPABSBMasked512 x (VPMOVVec8x64ToM mask)) +(AbsoluteMaskedInt16x8 x mask) => (VPABSWMasked128 x (VPMOVVec16x8ToM mask)) +(AbsoluteMaskedInt16x16 x mask) => (VPABSWMasked256 x (VPMOVVec16x16ToM mask)) +(AbsoluteMaskedInt16x32 x mask) => (VPABSWMasked512 x (VPMOVVec16x32ToM mask)) +(AbsoluteMaskedInt32x4 x mask) => (VPABSDMasked128 x (VPMOVVec32x4ToM mask)) +(AbsoluteMaskedInt32x8 x mask) => (VPABSDMasked256 x (VPMOVVec32x8ToM mask)) +(AbsoluteMaskedInt32x16 x mask) => (VPABSDMasked512 x (VPMOVVec32x16ToM mask)) +(AbsoluteMaskedInt64x2 x mask) => (VPABSQMasked128 x (VPMOVVec64x2ToM mask)) +(AbsoluteMaskedInt64x4 x mask) => (VPABSQMasked256 x (VPMOVVec64x4ToM mask)) +(AbsoluteMaskedInt64x8 x mask) => (VPABSQMasked512 x (VPMOVVec64x8ToM mask)) (AddFloat32x4 ...) => (VADDPS128 ...) (AddFloat32x8 ...) => (VADDPS256 ...) (AddFloat32x16 ...) => (VADDPS512 ...) @@ -42,6 +54,36 @@ (AddUint64x2 ...) => (VPADDQ128 ...) (AddUint64x4 ...) => (VPADDQ256 ...) (AddUint64x8 ...) => (VPADDQ512 ...) +(AddMaskedFloat32x4 x y mask) => (VADDPSMasked128 x y (VPMOVVec32x4ToM mask)) +(AddMaskedFloat32x8 x y mask) => (VADDPSMasked256 x y (VPMOVVec32x8ToM mask)) +(AddMaskedFloat32x16 x y mask) => (VADDPSMasked512 x y (VPMOVVec32x16ToM mask)) +(AddMaskedFloat64x2 x y mask) => (VADDPDMasked128 x y (VPMOVVec64x2ToM mask)) +(AddMaskedFloat64x4 x y mask) => (VADDPDMasked256 x y (VPMOVVec64x4ToM mask)) +(AddMaskedFloat64x8 x y mask) => (VADDPDMasked512 x y (VPMOVVec64x8ToM mask)) +(AddMaskedInt8x16 x y mask) => (VPADDBMasked128 x y (VPMOVVec8x16ToM mask)) +(AddMaskedInt8x32 x y mask) => (VPADDBMasked256 x y (VPMOVVec8x32ToM mask)) +(AddMaskedInt8x64 x y mask) => (VPADDBMasked512 x y (VPMOVVec8x64ToM mask)) +(AddMaskedInt16x8 x y mask) => (VPADDWMasked128 x y (VPMOVVec16x8ToM mask)) +(AddMaskedInt16x16 x y mask) => (VPADDWMasked256 x y (VPMOVVec16x16ToM mask)) +(AddMaskedInt16x32 x y mask) => (VPADDWMasked512 x y (VPMOVVec16x32ToM mask)) +(AddMaskedInt32x4 x y mask) => (VPADDDMasked128 x y (VPMOVVec32x4ToM mask)) +(AddMaskedInt32x8 x y mask) => (VPADDDMasked256 x y (VPMOVVec32x8ToM mask)) +(AddMaskedInt32x16 x y mask) => (VPADDDMasked512 x y (VPMOVVec32x16ToM mask)) +(AddMaskedInt64x2 x y mask) => (VPADDQMasked128 x y (VPMOVVec64x2ToM mask)) +(AddMaskedInt64x4 x y mask) => (VPADDQMasked256 x y (VPMOVVec64x4ToM mask)) +(AddMaskedInt64x8 x y mask) => (VPADDQMasked512 x y (VPMOVVec64x8ToM mask)) +(AddMaskedUint8x16 x y mask) => (VPADDBMasked128 x y (VPMOVVec8x16ToM mask)) +(AddMaskedUint8x32 x y mask) => (VPADDBMasked256 x y (VPMOVVec8x32ToM mask)) +(AddMaskedUint8x64 x y mask) => (VPADDBMasked512 x y (VPMOVVec8x64ToM mask)) +(AddMaskedUint16x8 x y mask) => (VPADDWMasked128 x y (VPMOVVec16x8ToM mask)) +(AddMaskedUint16x16 x y mask) => (VPADDWMasked256 x y (VPMOVVec16x16ToM mask)) +(AddMaskedUint16x32 x y mask) => (VPADDWMasked512 x y (VPMOVVec16x32ToM mask)) +(AddMaskedUint32x4 x y mask) => (VPADDDMasked128 x y (VPMOVVec32x4ToM mask)) +(AddMaskedUint32x8 x y mask) => (VPADDDMasked256 x y (VPMOVVec32x8ToM mask)) +(AddMaskedUint32x16 x y mask) => (VPADDDMasked512 x y (VPMOVVec32x16ToM mask)) +(AddMaskedUint64x2 x y mask) => (VPADDQMasked128 x y (VPMOVVec64x2ToM mask)) +(AddMaskedUint64x4 x y mask) => (VPADDQMasked256 x y (VPMOVVec64x4ToM mask)) +(AddMaskedUint64x8 x y mask) => (VPADDQMasked512 x y (VPMOVVec64x8ToM mask)) (AddSubFloat32x4 ...) => (VADDSUBPS128 ...) (AddSubFloat32x8 ...) => (VADDSUBPS256 ...) (AddSubFloat64x2 ...) => (VADDSUBPD128 ...) @@ -66,6 +108,18 @@ (AndUint64x2 ...) => (VPAND128 ...) (AndUint64x4 ...) => (VPAND256 ...) (AndUint64x8 ...) => (VPANDQ512 ...) +(AndMaskedInt32x4 x y mask) => (VPANDDMasked128 x y (VPMOVVec32x4ToM mask)) +(AndMaskedInt32x8 x y mask) => (VPANDDMasked256 x y (VPMOVVec32x8ToM mask)) +(AndMaskedInt32x16 x y mask) => (VPANDDMasked512 x y (VPMOVVec32x16ToM mask)) +(AndMaskedInt64x2 x y mask) => (VPANDQMasked128 x y (VPMOVVec64x2ToM mask)) +(AndMaskedInt64x4 x y mask) => (VPANDQMasked256 x y (VPMOVVec64x4ToM mask)) +(AndMaskedInt64x8 x y mask) => (VPANDQMasked512 x y (VPMOVVec64x8ToM mask)) +(AndMaskedUint32x4 x y mask) => (VPANDDMasked128 x y (VPMOVVec32x4ToM mask)) +(AndMaskedUint32x8 x y mask) => (VPANDDMasked256 x y (VPMOVVec32x8ToM mask)) +(AndMaskedUint32x16 x y mask) => (VPANDDMasked512 x y (VPMOVVec32x16ToM mask)) +(AndMaskedUint64x2 x y mask) => (VPANDQMasked128 x y (VPMOVVec64x2ToM mask)) +(AndMaskedUint64x4 x y mask) => (VPANDQMasked256 x y (VPMOVVec64x4ToM mask)) +(AndMaskedUint64x8 x y mask) => (VPANDQMasked512 x y (VPMOVVec64x8ToM mask)) (AndNotInt8x16 ...) => (VPANDN128 ...) (AndNotInt8x32 ...) => (VPANDN256 ...) (AndNotInt16x8 ...) => (VPANDN128 ...) @@ -86,24 +140,54 @@ (AndNotUint64x2 ...) => (VPANDN128 ...) (AndNotUint64x4 ...) => (VPANDN256 ...) (AndNotUint64x8 ...) => (VPANDNQ512 ...) +(AndNotMaskedInt32x4 x y mask) => (VPANDNDMasked128 x y (VPMOVVec32x4ToM mask)) +(AndNotMaskedInt32x8 x y mask) => (VPANDNDMasked256 x y (VPMOVVec32x8ToM mask)) +(AndNotMaskedInt32x16 x y mask) => (VPANDNDMasked512 x y (VPMOVVec32x16ToM mask)) +(AndNotMaskedInt64x2 x y mask) => (VPANDNQMasked128 x y (VPMOVVec64x2ToM mask)) +(AndNotMaskedInt64x4 x y mask) => (VPANDNQMasked256 x y (VPMOVVec64x4ToM mask)) +(AndNotMaskedInt64x8 x y mask) => (VPANDNQMasked512 x y (VPMOVVec64x8ToM mask)) +(AndNotMaskedUint32x4 x y mask) => (VPANDNDMasked128 x y (VPMOVVec32x4ToM mask)) +(AndNotMaskedUint32x8 x y mask) => (VPANDNDMasked256 x y (VPMOVVec32x8ToM mask)) +(AndNotMaskedUint32x16 x y mask) => (VPANDNDMasked512 x y (VPMOVVec32x16ToM mask)) +(AndNotMaskedUint64x2 x y mask) => (VPANDNQMasked128 x y (VPMOVVec64x2ToM mask)) +(AndNotMaskedUint64x4 x y mask) => (VPANDNQMasked256 x y (VPMOVVec64x4ToM mask)) +(AndNotMaskedUint64x8 x y mask) => (VPANDNQMasked512 x y (VPMOVVec64x8ToM mask)) (ApproximateReciprocalFloat32x4 ...) => (VRCP14PS128 ...) (ApproximateReciprocalFloat32x8 ...) => (VRCP14PS256 ...) (ApproximateReciprocalFloat32x16 ...) => (VRCP14PS512 ...) (ApproximateReciprocalFloat64x2 ...) => (VRCP14PD128 ...) (ApproximateReciprocalFloat64x4 ...) => (VRCP14PD256 ...) (ApproximateReciprocalFloat64x8 ...) => (VRCP14PD512 ...) +(ApproximateReciprocalMaskedFloat32x4 x mask) => (VRCP14PSMasked128 x (VPMOVVec32x4ToM mask)) +(ApproximateReciprocalMaskedFloat32x8 x mask) => (VRCP14PSMasked256 x (VPMOVVec32x8ToM mask)) +(ApproximateReciprocalMaskedFloat32x16 x mask) => (VRCP14PSMasked512 x (VPMOVVec32x16ToM mask)) +(ApproximateReciprocalMaskedFloat64x2 x mask) => (VRCP14PDMasked128 x (VPMOVVec64x2ToM mask)) +(ApproximateReciprocalMaskedFloat64x4 x mask) => (VRCP14PDMasked256 x (VPMOVVec64x4ToM mask)) +(ApproximateReciprocalMaskedFloat64x8 x mask) => (VRCP14PDMasked512 x (VPMOVVec64x8ToM mask)) (ApproximateReciprocalOfSqrtFloat32x4 ...) => (VRSQRTPS128 ...) (ApproximateReciprocalOfSqrtFloat32x8 ...) => (VRSQRTPS256 ...) (ApproximateReciprocalOfSqrtFloat32x16 ...) => (VRSQRT14PS512 ...) (ApproximateReciprocalOfSqrtFloat64x2 ...) => (VRSQRT14PD128 ...) (ApproximateReciprocalOfSqrtFloat64x4 ...) => (VRSQRT14PD256 ...) (ApproximateReciprocalOfSqrtFloat64x8 ...) => (VRSQRT14PD512 ...) +(ApproximateReciprocalOfSqrtMaskedFloat32x4 x mask) => (VRSQRT14PSMasked128 x (VPMOVVec32x4ToM mask)) +(ApproximateReciprocalOfSqrtMaskedFloat32x8 x mask) => (VRSQRT14PSMasked256 x (VPMOVVec32x8ToM mask)) +(ApproximateReciprocalOfSqrtMaskedFloat32x16 x mask) => (VRSQRT14PSMasked512 x (VPMOVVec32x16ToM mask)) +(ApproximateReciprocalOfSqrtMaskedFloat64x2 x mask) => (VRSQRT14PDMasked128 x (VPMOVVec64x2ToM mask)) +(ApproximateReciprocalOfSqrtMaskedFloat64x4 x mask) => (VRSQRT14PDMasked256 x (VPMOVVec64x4ToM mask)) +(ApproximateReciprocalOfSqrtMaskedFloat64x8 x mask) => (VRSQRT14PDMasked512 x (VPMOVVec64x8ToM mask)) (AverageUint8x16 ...) => (VPAVGB128 ...) (AverageUint8x32 ...) => (VPAVGB256 ...) (AverageUint8x64 ...) => (VPAVGB512 ...) (AverageUint16x8 ...) => (VPAVGW128 ...) (AverageUint16x16 ...) => (VPAVGW256 ...) (AverageUint16x32 ...) => (VPAVGW512 ...) +(AverageMaskedUint8x16 x y mask) => (VPAVGBMasked128 x y (VPMOVVec8x16ToM mask)) +(AverageMaskedUint8x32 x y mask) => (VPAVGBMasked256 x y (VPMOVVec8x32ToM mask)) +(AverageMaskedUint8x64 x y mask) => (VPAVGBMasked512 x y (VPMOVVec8x64ToM mask)) +(AverageMaskedUint16x8 x y mask) => (VPAVGWMasked128 x y (VPMOVVec16x8ToM mask)) +(AverageMaskedUint16x16 x y mask) => (VPAVGWMasked256 x y (VPMOVVec16x16ToM mask)) +(AverageMaskedUint16x32 x y mask) => (VPAVGWMasked512 x y (VPMOVVec16x32ToM mask)) (CeilFloat32x4 x) => (VROUNDPS128 [2] x) (CeilFloat32x8 x) => (VROUNDPS256 [2] x) (CeilFloat64x2 x) => (VROUNDPD128 [2] x) @@ -114,36 +198,72 @@ (CeilWithPrecisionFloat64x2 [a] x) => (VRNDSCALEPD128 [a+2] x) (CeilWithPrecisionFloat64x4 [a] x) => (VRNDSCALEPD256 [a+2] x) (CeilWithPrecisionFloat64x8 [a] x) => (VRNDSCALEPD512 [a+2] x) +(CeilWithPrecisionMaskedFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+2] x (VPMOVVec32x4ToM mask)) +(CeilWithPrecisionMaskedFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+2] x (VPMOVVec32x8ToM mask)) +(CeilWithPrecisionMaskedFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+2] x (VPMOVVec32x16ToM mask)) +(CeilWithPrecisionMaskedFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+2] x (VPMOVVec64x2ToM mask)) +(CeilWithPrecisionMaskedFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+2] x (VPMOVVec64x4ToM mask)) +(CeilWithPrecisionMaskedFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+2] x (VPMOVVec64x8ToM mask)) (DiffWithCeilWithPrecisionFloat32x4 [a] x) => (VREDUCEPS128 [a+2] x) (DiffWithCeilWithPrecisionFloat32x8 [a] x) => (VREDUCEPS256 [a+2] x) (DiffWithCeilWithPrecisionFloat32x16 [a] x) => (VREDUCEPS512 [a+2] x) (DiffWithCeilWithPrecisionFloat64x2 [a] x) => (VREDUCEPD128 [a+2] x) (DiffWithCeilWithPrecisionFloat64x4 [a] x) => (VREDUCEPD256 [a+2] x) (DiffWithCeilWithPrecisionFloat64x8 [a] x) => (VREDUCEPD512 [a+2] x) +(DiffWithCeilWithPrecisionMaskedFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+2] x (VPMOVVec32x4ToM mask)) +(DiffWithCeilWithPrecisionMaskedFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+2] x (VPMOVVec32x8ToM mask)) +(DiffWithCeilWithPrecisionMaskedFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+2] x (VPMOVVec32x16ToM mask)) +(DiffWithCeilWithPrecisionMaskedFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+2] x (VPMOVVec64x2ToM mask)) +(DiffWithCeilWithPrecisionMaskedFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+2] x (VPMOVVec64x4ToM mask)) +(DiffWithCeilWithPrecisionMaskedFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+2] x (VPMOVVec64x8ToM mask)) (DiffWithFloorWithPrecisionFloat32x4 [a] x) => (VREDUCEPS128 [a+1] x) (DiffWithFloorWithPrecisionFloat32x8 [a] x) => (VREDUCEPS256 [a+1] x) (DiffWithFloorWithPrecisionFloat32x16 [a] x) => (VREDUCEPS512 [a+1] x) (DiffWithFloorWithPrecisionFloat64x2 [a] x) => (VREDUCEPD128 [a+1] x) (DiffWithFloorWithPrecisionFloat64x4 [a] x) => (VREDUCEPD256 [a+1] x) (DiffWithFloorWithPrecisionFloat64x8 [a] x) => (VREDUCEPD512 [a+1] x) +(DiffWithFloorWithPrecisionMaskedFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+1] x (VPMOVVec32x4ToM mask)) +(DiffWithFloorWithPrecisionMaskedFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+1] x (VPMOVVec32x8ToM mask)) +(DiffWithFloorWithPrecisionMaskedFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+1] x (VPMOVVec32x16ToM mask)) +(DiffWithFloorWithPrecisionMaskedFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+1] x (VPMOVVec64x2ToM mask)) +(DiffWithFloorWithPrecisionMaskedFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+1] x (VPMOVVec64x4ToM mask)) +(DiffWithFloorWithPrecisionMaskedFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+1] x (VPMOVVec64x8ToM mask)) (DiffWithRoundWithPrecisionFloat32x4 [a] x) => (VREDUCEPS128 [a+0] x) (DiffWithRoundWithPrecisionFloat32x8 [a] x) => (VREDUCEPS256 [a+0] x) (DiffWithRoundWithPrecisionFloat32x16 [a] x) => (VREDUCEPS512 [a+0] x) (DiffWithRoundWithPrecisionFloat64x2 [a] x) => (VREDUCEPD128 [a+0] x) (DiffWithRoundWithPrecisionFloat64x4 [a] x) => (VREDUCEPD256 [a+0] x) (DiffWithRoundWithPrecisionFloat64x8 [a] x) => (VREDUCEPD512 [a+0] x) +(DiffWithRoundWithPrecisionMaskedFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+0] x (VPMOVVec32x4ToM mask)) +(DiffWithRoundWithPrecisionMaskedFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+0] x (VPMOVVec32x8ToM mask)) +(DiffWithRoundWithPrecisionMaskedFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+0] x (VPMOVVec32x16ToM mask)) +(DiffWithRoundWithPrecisionMaskedFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+0] x (VPMOVVec64x2ToM mask)) +(DiffWithRoundWithPrecisionMaskedFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+0] x (VPMOVVec64x4ToM mask)) +(DiffWithRoundWithPrecisionMaskedFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+0] x (VPMOVVec64x8ToM mask)) (DiffWithTruncWithPrecisionFloat32x4 [a] x) => (VREDUCEPS128 [a+3] x) (DiffWithTruncWithPrecisionFloat32x8 [a] x) => (VREDUCEPS256 [a+3] x) (DiffWithTruncWithPrecisionFloat32x16 [a] x) => (VREDUCEPS512 [a+3] x) (DiffWithTruncWithPrecisionFloat64x2 [a] x) => (VREDUCEPD128 [a+3] x) (DiffWithTruncWithPrecisionFloat64x4 [a] x) => (VREDUCEPD256 [a+3] x) (DiffWithTruncWithPrecisionFloat64x8 [a] x) => (VREDUCEPD512 [a+3] x) +(DiffWithTruncWithPrecisionMaskedFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+3] x (VPMOVVec32x4ToM mask)) +(DiffWithTruncWithPrecisionMaskedFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+3] x (VPMOVVec32x8ToM mask)) +(DiffWithTruncWithPrecisionMaskedFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+3] x (VPMOVVec32x16ToM mask)) +(DiffWithTruncWithPrecisionMaskedFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+3] x (VPMOVVec64x2ToM mask)) +(DiffWithTruncWithPrecisionMaskedFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+3] x (VPMOVVec64x4ToM mask)) +(DiffWithTruncWithPrecisionMaskedFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+3] x (VPMOVVec64x8ToM mask)) (DivFloat32x4 ...) => (VDIVPS128 ...) (DivFloat32x8 ...) => (VDIVPS256 ...) (DivFloat32x16 ...) => (VDIVPS512 ...) (DivFloat64x2 ...) => (VDIVPD128 ...) (DivFloat64x4 ...) => (VDIVPD256 ...) (DivFloat64x8 ...) => (VDIVPD512 ...) +(DivMaskedFloat32x4 x y mask) => (VDIVPSMasked128 x y (VPMOVVec32x4ToM mask)) +(DivMaskedFloat32x8 x y mask) => (VDIVPSMasked256 x y (VPMOVVec32x8ToM mask)) +(DivMaskedFloat32x16 x y mask) => (VDIVPSMasked512 x y (VPMOVVec32x16ToM mask)) +(DivMaskedFloat64x2 x y mask) => (VDIVPDMasked128 x y (VPMOVVec64x2ToM mask)) +(DivMaskedFloat64x4 x y mask) => (VDIVPDMasked256 x y (VPMOVVec64x4ToM mask)) +(DivMaskedFloat64x8 x y mask) => (VDIVPDMasked512 x y (VPMOVVec64x8ToM mask)) (DotProdBroadcastFloat64x2 x y) => (VDPPD128 [127] x y) (EqualFloat32x4 x y) => (VCMPPS128 [0] x y) (EqualFloat32x8 x y) => (VCMPPS256 [0] x y) @@ -175,6 +295,36 @@ (EqualUint64x2 x y) => (VPMOVMToVec64x2 (VPCMPUQ128 [0] x y)) (EqualUint64x4 x y) => (VPMOVMToVec64x4 (VPCMPUQ256 [0] x y)) (EqualUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [0] x y)) +(EqualMaskedFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [0] x y (VPMOVVec32x4ToM mask))) +(EqualMaskedFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [0] x y (VPMOVVec32x8ToM mask))) +(EqualMaskedFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [0] x y (VPMOVVec32x16ToM mask))) +(EqualMaskedFloat64x2 x y mask) => (VPMOVMToVec64x2 (VCMPPDMasked128 [0] x y (VPMOVVec64x2ToM mask))) +(EqualMaskedFloat64x4 x y mask) => (VPMOVMToVec64x4 (VCMPPDMasked256 [0] x y (VPMOVVec64x4ToM mask))) +(EqualMaskedFloat64x8 x y mask) => (VPMOVMToVec64x8 (VCMPPDMasked512 [0] x y (VPMOVVec64x8ToM mask))) +(EqualMaskedInt8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPBMasked128 [0] x y (VPMOVVec8x16ToM mask))) +(EqualMaskedInt8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPBMasked256 [0] x y (VPMOVVec8x32ToM mask))) +(EqualMaskedInt8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPBMasked512 [0] x y (VPMOVVec8x64ToM mask))) +(EqualMaskedInt16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPWMasked128 [0] x y (VPMOVVec16x8ToM mask))) +(EqualMaskedInt16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPWMasked256 [0] x y (VPMOVVec16x16ToM mask))) +(EqualMaskedInt16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPWMasked512 [0] x y (VPMOVVec16x32ToM mask))) +(EqualMaskedInt32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPDMasked128 [0] x y (VPMOVVec32x4ToM mask))) +(EqualMaskedInt32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPDMasked256 [0] x y (VPMOVVec32x8ToM mask))) +(EqualMaskedInt32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPDMasked512 [0] x y (VPMOVVec32x16ToM mask))) +(EqualMaskedInt64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPQMasked128 [0] x y (VPMOVVec64x2ToM mask))) +(EqualMaskedInt64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPQMasked256 [0] x y (VPMOVVec64x4ToM mask))) +(EqualMaskedInt64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPQMasked512 [0] x y (VPMOVVec64x8ToM mask))) +(EqualMaskedUint8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPUBMasked128 [0] x y (VPMOVVec8x16ToM mask))) +(EqualMaskedUint8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPUBMasked256 [0] x y (VPMOVVec8x32ToM mask))) +(EqualMaskedUint8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPUBMasked512 [0] x y (VPMOVVec8x64ToM mask))) +(EqualMaskedUint16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPUWMasked128 [0] x y (VPMOVVec16x8ToM mask))) +(EqualMaskedUint16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPUWMasked256 [0] x y (VPMOVVec16x16ToM mask))) +(EqualMaskedUint16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPUWMasked512 [0] x y (VPMOVVec16x32ToM mask))) +(EqualMaskedUint32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPUDMasked128 [0] x y (VPMOVVec32x4ToM mask))) +(EqualMaskedUint32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPUDMasked256 [0] x y (VPMOVVec32x8ToM mask))) +(EqualMaskedUint32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPUDMasked512 [0] x y (VPMOVVec32x16ToM mask))) +(EqualMaskedUint64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPUQMasked128 [0] x y (VPMOVVec64x2ToM mask))) +(EqualMaskedUint64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPUQMasked256 [0] x y (VPMOVVec64x4ToM mask))) +(EqualMaskedUint64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPUQMasked512 [0] x y (VPMOVVec64x8ToM mask))) (FloorFloat32x4 x) => (VROUNDPS128 [1] x) (FloorFloat32x8 x) => (VROUNDPS256 [1] x) (FloorFloat64x2 x) => (VROUNDPD128 [1] x) @@ -185,33 +335,66 @@ (FloorWithPrecisionFloat64x2 [a] x) => (VRNDSCALEPD128 [a+1] x) (FloorWithPrecisionFloat64x4 [a] x) => (VRNDSCALEPD256 [a+1] x) (FloorWithPrecisionFloat64x8 [a] x) => (VRNDSCALEPD512 [a+1] x) +(FloorWithPrecisionMaskedFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+1] x (VPMOVVec32x4ToM mask)) +(FloorWithPrecisionMaskedFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+1] x (VPMOVVec32x8ToM mask)) +(FloorWithPrecisionMaskedFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+1] x (VPMOVVec32x16ToM mask)) +(FloorWithPrecisionMaskedFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+1] x (VPMOVVec64x2ToM mask)) +(FloorWithPrecisionMaskedFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+1] x (VPMOVVec64x4ToM mask)) +(FloorWithPrecisionMaskedFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+1] x (VPMOVVec64x8ToM mask)) (FusedMultiplyAddFloat32x4 ...) => (VFMADD213PS128 ...) (FusedMultiplyAddFloat32x8 ...) => (VFMADD213PS256 ...) (FusedMultiplyAddFloat32x16 ...) => (VFMADD213PS512 ...) (FusedMultiplyAddFloat64x2 ...) => (VFMADD213PD128 ...) (FusedMultiplyAddFloat64x4 ...) => (VFMADD213PD256 ...) (FusedMultiplyAddFloat64x8 ...) => (VFMADD213PD512 ...) +(FusedMultiplyAddMaskedFloat32x4 x y z mask) => (VFMADD213PSMasked128 x y z (VPMOVVec32x4ToM mask)) +(FusedMultiplyAddMaskedFloat32x8 x y z mask) => (VFMADD213PSMasked256 x y z (VPMOVVec32x8ToM mask)) +(FusedMultiplyAddMaskedFloat32x16 x y z mask) => (VFMADD213PSMasked512 x y z (VPMOVVec32x16ToM mask)) +(FusedMultiplyAddMaskedFloat64x2 x y z mask) => (VFMADD213PDMasked128 x y z (VPMOVVec64x2ToM mask)) +(FusedMultiplyAddMaskedFloat64x4 x y z mask) => (VFMADD213PDMasked256 x y z (VPMOVVec64x4ToM mask)) +(FusedMultiplyAddMaskedFloat64x8 x y z mask) => (VFMADD213PDMasked512 x y z (VPMOVVec64x8ToM mask)) (FusedMultiplyAddSubFloat32x4 ...) => (VFMADDSUB213PS128 ...) (FusedMultiplyAddSubFloat32x8 ...) => (VFMADDSUB213PS256 ...) (FusedMultiplyAddSubFloat32x16 ...) => (VFMADDSUB213PS512 ...) (FusedMultiplyAddSubFloat64x2 ...) => (VFMADDSUB213PD128 ...) (FusedMultiplyAddSubFloat64x4 ...) => (VFMADDSUB213PD256 ...) (FusedMultiplyAddSubFloat64x8 ...) => (VFMADDSUB213PD512 ...) +(FusedMultiplyAddSubMaskedFloat32x4 x y z mask) => (VFMADDSUB213PSMasked128 x y z (VPMOVVec32x4ToM mask)) +(FusedMultiplyAddSubMaskedFloat32x8 x y z mask) => (VFMADDSUB213PSMasked256 x y z (VPMOVVec32x8ToM mask)) +(FusedMultiplyAddSubMaskedFloat32x16 x y z mask) => (VFMADDSUB213PSMasked512 x y z (VPMOVVec32x16ToM mask)) +(FusedMultiplyAddSubMaskedFloat64x2 x y z mask) => (VFMADDSUB213PDMasked128 x y z (VPMOVVec64x2ToM mask)) +(FusedMultiplyAddSubMaskedFloat64x4 x y z mask) => (VFMADDSUB213PDMasked256 x y z (VPMOVVec64x4ToM mask)) +(FusedMultiplyAddSubMaskedFloat64x8 x y z mask) => (VFMADDSUB213PDMasked512 x y z (VPMOVVec64x8ToM mask)) (FusedMultiplySubAddFloat32x4 ...) => (VFMSUBADD213PS128 ...) (FusedMultiplySubAddFloat32x8 ...) => (VFMSUBADD213PS256 ...) (FusedMultiplySubAddFloat32x16 ...) => (VFMSUBADD213PS512 ...) (FusedMultiplySubAddFloat64x2 ...) => (VFMSUBADD213PD128 ...) (FusedMultiplySubAddFloat64x4 ...) => (VFMSUBADD213PD256 ...) (FusedMultiplySubAddFloat64x8 ...) => (VFMSUBADD213PD512 ...) +(FusedMultiplySubAddMaskedFloat32x4 x y z mask) => (VFMSUBADD213PSMasked128 x y z (VPMOVVec32x4ToM mask)) +(FusedMultiplySubAddMaskedFloat32x8 x y z mask) => (VFMSUBADD213PSMasked256 x y z (VPMOVVec32x8ToM mask)) +(FusedMultiplySubAddMaskedFloat32x16 x y z mask) => (VFMSUBADD213PSMasked512 x y z (VPMOVVec32x16ToM mask)) +(FusedMultiplySubAddMaskedFloat64x2 x y z mask) => (VFMSUBADD213PDMasked128 x y z (VPMOVVec64x2ToM mask)) +(FusedMultiplySubAddMaskedFloat64x4 x y z mask) => (VFMSUBADD213PDMasked256 x y z (VPMOVVec64x4ToM mask)) +(FusedMultiplySubAddMaskedFloat64x8 x y z mask) => (VFMSUBADD213PDMasked512 x y z (VPMOVVec64x8ToM mask)) (GaloisFieldAffineTransformUint8x16 [a] x y) => (VGF2P8AFFINEQB128 [a] x y) (GaloisFieldAffineTransformUint8x32 [a] x y) => (VGF2P8AFFINEQB256 [a] x y) (GaloisFieldAffineTransformUint8x64 [a] x y) => (VGF2P8AFFINEQB512 [a] x y) (GaloisFieldAffineTransformInversedUint8x16 [a] x y) => (VGF2P8AFFINEINVQB128 [a] x y) (GaloisFieldAffineTransformInversedUint8x32 [a] x y) => (VGF2P8AFFINEINVQB256 [a] x y) (GaloisFieldAffineTransformInversedUint8x64 [a] x y) => (VGF2P8AFFINEINVQB512 [a] x y) +(GaloisFieldAffineTransformInversedMaskedUint8x16 [a] x y mask) => (VGF2P8AFFINEINVQBMasked128 [a] x y (VPMOVVec8x16ToM mask)) +(GaloisFieldAffineTransformInversedMaskedUint8x32 [a] x y mask) => (VGF2P8AFFINEINVQBMasked256 [a] x y (VPMOVVec8x32ToM mask)) +(GaloisFieldAffineTransformInversedMaskedUint8x64 [a] x y mask) => (VGF2P8AFFINEINVQBMasked512 [a] x y (VPMOVVec8x64ToM mask)) +(GaloisFieldAffineTransformMaskedUint8x16 [a] x y mask) => (VGF2P8AFFINEQBMasked128 [a] x y (VPMOVVec8x16ToM mask)) +(GaloisFieldAffineTransformMaskedUint8x32 [a] x y mask) => (VGF2P8AFFINEQBMasked256 [a] x y (VPMOVVec8x32ToM mask)) +(GaloisFieldAffineTransformMaskedUint8x64 [a] x y mask) => (VGF2P8AFFINEQBMasked512 [a] x y (VPMOVVec8x64ToM mask)) (GaloisFieldMulUint8x16 ...) => (VGF2P8MULB128 ...) (GaloisFieldMulUint8x32 ...) => (VGF2P8MULB256 ...) (GaloisFieldMulUint8x64 ...) => (VGF2P8MULB512 ...) +(GaloisFieldMulMaskedUint8x16 x y mask) => (VGF2P8MULBMasked128 x y (VPMOVVec8x16ToM mask)) +(GaloisFieldMulMaskedUint8x32 x y mask) => (VGF2P8MULBMasked256 x y (VPMOVVec8x32ToM mask)) +(GaloisFieldMulMaskedUint8x64 x y mask) => (VGF2P8MULBMasked512 x y (VPMOVVec8x64ToM mask)) (Get128Float32x8 [a] x) => (VEXTRACTF128128 [a] x) (Get128Float64x4 [a] x) => (VEXTRACTF128128 [a] x) (Get128Int8x32 [a] x) => (VEXTRACTI128128 [a] x) @@ -290,12 +473,78 @@ (GreaterEqualUint64x2 x y) => (VPMOVMToVec64x2 (VPCMPUQ128 [13] x y)) (GreaterEqualUint64x4 x y) => (VPMOVMToVec64x4 (VPCMPUQ256 [13] x y)) (GreaterEqualUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [13] x y)) +(GreaterEqualMaskedFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [13] x y (VPMOVVec32x4ToM mask))) +(GreaterEqualMaskedFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [13] x y (VPMOVVec32x8ToM mask))) +(GreaterEqualMaskedFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [13] x y (VPMOVVec32x16ToM mask))) +(GreaterEqualMaskedFloat64x2 x y mask) => (VPMOVMToVec64x2 (VCMPPDMasked128 [13] x y (VPMOVVec64x2ToM mask))) +(GreaterEqualMaskedFloat64x4 x y mask) => (VPMOVMToVec64x4 (VCMPPDMasked256 [13] x y (VPMOVVec64x4ToM mask))) +(GreaterEqualMaskedFloat64x8 x y mask) => (VPMOVMToVec64x8 (VCMPPDMasked512 [13] x y (VPMOVVec64x8ToM mask))) +(GreaterEqualMaskedInt8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPBMasked128 [13] x y (VPMOVVec8x16ToM mask))) +(GreaterEqualMaskedInt8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPBMasked256 [13] x y (VPMOVVec8x32ToM mask))) +(GreaterEqualMaskedInt8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPBMasked512 [13] x y (VPMOVVec8x64ToM mask))) +(GreaterEqualMaskedInt16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPWMasked128 [13] x y (VPMOVVec16x8ToM mask))) +(GreaterEqualMaskedInt16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPWMasked256 [13] x y (VPMOVVec16x16ToM mask))) +(GreaterEqualMaskedInt16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPWMasked512 [13] x y (VPMOVVec16x32ToM mask))) +(GreaterEqualMaskedInt32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPDMasked128 [13] x y (VPMOVVec32x4ToM mask))) +(GreaterEqualMaskedInt32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPDMasked256 [13] x y (VPMOVVec32x8ToM mask))) +(GreaterEqualMaskedInt32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPDMasked512 [13] x y (VPMOVVec32x16ToM mask))) +(GreaterEqualMaskedInt64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPQMasked128 [13] x y (VPMOVVec64x2ToM mask))) +(GreaterEqualMaskedInt64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPQMasked256 [13] x y (VPMOVVec64x4ToM mask))) +(GreaterEqualMaskedInt64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPQMasked512 [13] x y (VPMOVVec64x8ToM mask))) +(GreaterEqualMaskedUint8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPUBMasked128 [13] x y (VPMOVVec8x16ToM mask))) +(GreaterEqualMaskedUint8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPUBMasked256 [13] x y (VPMOVVec8x32ToM mask))) +(GreaterEqualMaskedUint8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPUBMasked512 [13] x y (VPMOVVec8x64ToM mask))) +(GreaterEqualMaskedUint16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPUWMasked128 [13] x y (VPMOVVec16x8ToM mask))) +(GreaterEqualMaskedUint16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPUWMasked256 [13] x y (VPMOVVec16x16ToM mask))) +(GreaterEqualMaskedUint16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPUWMasked512 [13] x y (VPMOVVec16x32ToM mask))) +(GreaterEqualMaskedUint32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPUDMasked128 [13] x y (VPMOVVec32x4ToM mask))) +(GreaterEqualMaskedUint32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPUDMasked256 [13] x y (VPMOVVec32x8ToM mask))) +(GreaterEqualMaskedUint32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPUDMasked512 [13] x y (VPMOVVec32x16ToM mask))) +(GreaterEqualMaskedUint64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPUQMasked128 [13] x y (VPMOVVec64x2ToM mask))) +(GreaterEqualMaskedUint64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPUQMasked256 [13] x y (VPMOVVec64x4ToM mask))) +(GreaterEqualMaskedUint64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPUQMasked512 [13] x y (VPMOVVec64x8ToM mask))) +(GreaterMaskedFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [14] x y (VPMOVVec32x4ToM mask))) +(GreaterMaskedFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [14] x y (VPMOVVec32x8ToM mask))) +(GreaterMaskedFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [14] x y (VPMOVVec32x16ToM mask))) +(GreaterMaskedFloat64x2 x y mask) => (VPMOVMToVec64x2 (VCMPPDMasked128 [14] x y (VPMOVVec64x2ToM mask))) +(GreaterMaskedFloat64x4 x y mask) => (VPMOVMToVec64x4 (VCMPPDMasked256 [14] x y (VPMOVVec64x4ToM mask))) +(GreaterMaskedFloat64x8 x y mask) => (VPMOVMToVec64x8 (VCMPPDMasked512 [14] x y (VPMOVVec64x8ToM mask))) +(GreaterMaskedInt8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPBMasked128 [14] x y (VPMOVVec8x16ToM mask))) +(GreaterMaskedInt8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPBMasked256 [14] x y (VPMOVVec8x32ToM mask))) +(GreaterMaskedInt8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPBMasked512 [14] x y (VPMOVVec8x64ToM mask))) +(GreaterMaskedInt16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPWMasked128 [14] x y (VPMOVVec16x8ToM mask))) +(GreaterMaskedInt16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPWMasked256 [14] x y (VPMOVVec16x16ToM mask))) +(GreaterMaskedInt16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPWMasked512 [14] x y (VPMOVVec16x32ToM mask))) +(GreaterMaskedInt32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPDMasked128 [14] x y (VPMOVVec32x4ToM mask))) +(GreaterMaskedInt32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPDMasked256 [14] x y (VPMOVVec32x8ToM mask))) +(GreaterMaskedInt32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPDMasked512 [14] x y (VPMOVVec32x16ToM mask))) +(GreaterMaskedInt64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPQMasked128 [14] x y (VPMOVVec64x2ToM mask))) +(GreaterMaskedInt64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPQMasked256 [14] x y (VPMOVVec64x4ToM mask))) +(GreaterMaskedInt64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPQMasked512 [14] x y (VPMOVVec64x8ToM mask))) +(GreaterMaskedUint8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPUBMasked128 [14] x y (VPMOVVec8x16ToM mask))) +(GreaterMaskedUint8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPUBMasked256 [14] x y (VPMOVVec8x32ToM mask))) +(GreaterMaskedUint8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPUBMasked512 [14] x y (VPMOVVec8x64ToM mask))) +(GreaterMaskedUint16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPUWMasked128 [14] x y (VPMOVVec16x8ToM mask))) +(GreaterMaskedUint16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPUWMasked256 [14] x y (VPMOVVec16x16ToM mask))) +(GreaterMaskedUint16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPUWMasked512 [14] x y (VPMOVVec16x32ToM mask))) +(GreaterMaskedUint32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPUDMasked128 [14] x y (VPMOVVec32x4ToM mask))) +(GreaterMaskedUint32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPUDMasked256 [14] x y (VPMOVVec32x8ToM mask))) +(GreaterMaskedUint32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPUDMasked512 [14] x y (VPMOVVec32x16ToM mask))) +(GreaterMaskedUint64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPUQMasked128 [14] x y (VPMOVVec64x2ToM mask))) +(GreaterMaskedUint64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPUQMasked256 [14] x y (VPMOVVec64x4ToM mask))) +(GreaterMaskedUint64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPUQMasked512 [14] x y (VPMOVVec64x8ToM mask))) (IsNanFloat32x4 x y) => (VCMPPS128 [3] x y) (IsNanFloat32x8 x y) => (VCMPPS256 [3] x y) (IsNanFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [3] x y)) (IsNanFloat64x2 x y) => (VCMPPD128 [3] x y) (IsNanFloat64x4 x y) => (VCMPPD256 [3] x y) (IsNanFloat64x8 x y) => (VPMOVMToVec64x8 (VCMPPD512 [3] x y)) +(IsNanMaskedFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [3] x y (VPMOVVec32x4ToM mask))) +(IsNanMaskedFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [3] x y (VPMOVVec32x8ToM mask))) +(IsNanMaskedFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [3] x y (VPMOVVec32x16ToM mask))) +(IsNanMaskedFloat64x2 x y mask) => (VPMOVMToVec64x2 (VCMPPDMasked128 [3] x y (VPMOVVec64x2ToM mask))) +(IsNanMaskedFloat64x4 x y mask) => (VPMOVMToVec64x4 (VCMPPDMasked256 [3] x y (VPMOVVec64x4ToM mask))) +(IsNanMaskedFloat64x8 x y mask) => (VPMOVMToVec64x8 (VCMPPDMasked512 [3] x y (VPMOVVec64x8ToM mask))) (LessFloat32x4 x y) => (VCMPPS128 [1] x y) (LessFloat32x8 x y) => (VCMPPS256 [1] x y) (LessFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [1] x y)) @@ -356,771 +605,66 @@ (LessEqualUint64x2 x y) => (VPMOVMToVec64x2 (VPCMPUQ128 [2] x y)) (LessEqualUint64x4 x y) => (VPMOVMToVec64x4 (VPCMPUQ256 [2] x y)) (LessEqualUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [2] x y)) -(MaskedAbsoluteInt8x16 x mask) => (VPABSBMasked128 x (VPMOVVec8x16ToM mask)) -(MaskedAbsoluteInt8x32 x mask) => (VPABSBMasked256 x (VPMOVVec8x32ToM mask)) -(MaskedAbsoluteInt8x64 x mask) => (VPABSBMasked512 x (VPMOVVec8x64ToM mask)) -(MaskedAbsoluteInt16x8 x mask) => (VPABSWMasked128 x (VPMOVVec16x8ToM mask)) -(MaskedAbsoluteInt16x16 x mask) => (VPABSWMasked256 x (VPMOVVec16x16ToM mask)) -(MaskedAbsoluteInt16x32 x mask) => (VPABSWMasked512 x (VPMOVVec16x32ToM mask)) -(MaskedAbsoluteInt32x4 x mask) => (VPABSDMasked128 x (VPMOVVec32x4ToM mask)) -(MaskedAbsoluteInt32x8 x mask) => (VPABSDMasked256 x (VPMOVVec32x8ToM mask)) -(MaskedAbsoluteInt32x16 x mask) => (VPABSDMasked512 x (VPMOVVec32x16ToM mask)) -(MaskedAbsoluteInt64x2 x mask) => (VPABSQMasked128 x (VPMOVVec64x2ToM mask)) -(MaskedAbsoluteInt64x4 x mask) => (VPABSQMasked256 x (VPMOVVec64x4ToM mask)) -(MaskedAbsoluteInt64x8 x mask) => (VPABSQMasked512 x (VPMOVVec64x8ToM mask)) -(MaskedAddFloat32x4 x y mask) => (VADDPSMasked128 x y (VPMOVVec32x4ToM mask)) -(MaskedAddFloat32x8 x y mask) => (VADDPSMasked256 x y (VPMOVVec32x8ToM mask)) -(MaskedAddFloat32x16 x y mask) => (VADDPSMasked512 x y (VPMOVVec32x16ToM mask)) -(MaskedAddFloat64x2 x y mask) => (VADDPDMasked128 x y (VPMOVVec64x2ToM mask)) -(MaskedAddFloat64x4 x y mask) => (VADDPDMasked256 x y (VPMOVVec64x4ToM mask)) -(MaskedAddFloat64x8 x y mask) => (VADDPDMasked512 x y (VPMOVVec64x8ToM mask)) -(MaskedAddInt8x16 x y mask) => (VPADDBMasked128 x y (VPMOVVec8x16ToM mask)) -(MaskedAddInt8x32 x y mask) => (VPADDBMasked256 x y (VPMOVVec8x32ToM mask)) -(MaskedAddInt8x64 x y mask) => (VPADDBMasked512 x y (VPMOVVec8x64ToM mask)) -(MaskedAddInt16x8 x y mask) => (VPADDWMasked128 x y (VPMOVVec16x8ToM mask)) -(MaskedAddInt16x16 x y mask) => (VPADDWMasked256 x y (VPMOVVec16x16ToM mask)) -(MaskedAddInt16x32 x y mask) => (VPADDWMasked512 x y (VPMOVVec16x32ToM mask)) -(MaskedAddInt32x4 x y mask) => (VPADDDMasked128 x y (VPMOVVec32x4ToM mask)) -(MaskedAddInt32x8 x y mask) => (VPADDDMasked256 x y (VPMOVVec32x8ToM mask)) -(MaskedAddInt32x16 x y mask) => (VPADDDMasked512 x y (VPMOVVec32x16ToM mask)) -(MaskedAddInt64x2 x y mask) => (VPADDQMasked128 x y (VPMOVVec64x2ToM mask)) -(MaskedAddInt64x4 x y mask) => (VPADDQMasked256 x y (VPMOVVec64x4ToM mask)) -(MaskedAddInt64x8 x y mask) => (VPADDQMasked512 x y (VPMOVVec64x8ToM mask)) -(MaskedAddUint8x16 x y mask) => (VPADDBMasked128 x y (VPMOVVec8x16ToM mask)) -(MaskedAddUint8x32 x y mask) => (VPADDBMasked256 x y (VPMOVVec8x32ToM mask)) -(MaskedAddUint8x64 x y mask) => (VPADDBMasked512 x y (VPMOVVec8x64ToM mask)) -(MaskedAddUint16x8 x y mask) => (VPADDWMasked128 x y (VPMOVVec16x8ToM mask)) -(MaskedAddUint16x16 x y mask) => (VPADDWMasked256 x y (VPMOVVec16x16ToM mask)) -(MaskedAddUint16x32 x y mask) => (VPADDWMasked512 x y (VPMOVVec16x32ToM mask)) -(MaskedAddUint32x4 x y mask) => (VPADDDMasked128 x y (VPMOVVec32x4ToM mask)) -(MaskedAddUint32x8 x y mask) => (VPADDDMasked256 x y (VPMOVVec32x8ToM mask)) -(MaskedAddUint32x16 x y mask) => (VPADDDMasked512 x y (VPMOVVec32x16ToM mask)) -(MaskedAddUint64x2 x y mask) => (VPADDQMasked128 x y (VPMOVVec64x2ToM mask)) -(MaskedAddUint64x4 x y mask) => (VPADDQMasked256 x y (VPMOVVec64x4ToM mask)) -(MaskedAddUint64x8 x y mask) => (VPADDQMasked512 x y (VPMOVVec64x8ToM mask)) -(MaskedAndInt32x4 x y mask) => (VPANDDMasked128 x y (VPMOVVec32x4ToM mask)) -(MaskedAndInt32x8 x y mask) => (VPANDDMasked256 x y (VPMOVVec32x8ToM mask)) -(MaskedAndInt32x16 x y mask) => (VPANDDMasked512 x y (VPMOVVec32x16ToM mask)) -(MaskedAndInt64x2 x y mask) => (VPANDQMasked128 x y (VPMOVVec64x2ToM mask)) -(MaskedAndInt64x4 x y mask) => (VPANDQMasked256 x y (VPMOVVec64x4ToM mask)) -(MaskedAndInt64x8 x y mask) => (VPANDQMasked512 x y (VPMOVVec64x8ToM mask)) -(MaskedAndUint32x4 x y mask) => (VPANDDMasked128 x y (VPMOVVec32x4ToM mask)) -(MaskedAndUint32x8 x y mask) => (VPANDDMasked256 x y (VPMOVVec32x8ToM mask)) -(MaskedAndUint32x16 x y mask) => (VPANDDMasked512 x y (VPMOVVec32x16ToM mask)) -(MaskedAndUint64x2 x y mask) => (VPANDQMasked128 x y (VPMOVVec64x2ToM mask)) -(MaskedAndUint64x4 x y mask) => (VPANDQMasked256 x y (VPMOVVec64x4ToM mask)) -(MaskedAndUint64x8 x y mask) => (VPANDQMasked512 x y (VPMOVVec64x8ToM mask)) -(MaskedAndNotInt32x4 x y mask) => (VPANDNDMasked128 x y (VPMOVVec32x4ToM mask)) -(MaskedAndNotInt32x8 x y mask) => (VPANDNDMasked256 x y (VPMOVVec32x8ToM mask)) -(MaskedAndNotInt32x16 x y mask) => (VPANDNDMasked512 x y (VPMOVVec32x16ToM mask)) -(MaskedAndNotInt64x2 x y mask) => (VPANDNQMasked128 x y (VPMOVVec64x2ToM mask)) -(MaskedAndNotInt64x4 x y mask) => (VPANDNQMasked256 x y (VPMOVVec64x4ToM mask)) -(MaskedAndNotInt64x8 x y mask) => (VPANDNQMasked512 x y (VPMOVVec64x8ToM mask)) -(MaskedAndNotUint32x4 x y mask) => (VPANDNDMasked128 x y (VPMOVVec32x4ToM mask)) -(MaskedAndNotUint32x8 x y mask) => (VPANDNDMasked256 x y (VPMOVVec32x8ToM mask)) -(MaskedAndNotUint32x16 x y mask) => (VPANDNDMasked512 x y (VPMOVVec32x16ToM mask)) -(MaskedAndNotUint64x2 x y mask) => (VPANDNQMasked128 x y (VPMOVVec64x2ToM mask)) -(MaskedAndNotUint64x4 x y mask) => (VPANDNQMasked256 x y (VPMOVVec64x4ToM mask)) -(MaskedAndNotUint64x8 x y mask) => (VPANDNQMasked512 x y (VPMOVVec64x8ToM mask)) -(MaskedApproximateReciprocalFloat32x4 x mask) => (VRCP14PSMasked128 x (VPMOVVec32x4ToM mask)) -(MaskedApproximateReciprocalFloat32x8 x mask) => (VRCP14PSMasked256 x (VPMOVVec32x8ToM mask)) -(MaskedApproximateReciprocalFloat32x16 x mask) => (VRCP14PSMasked512 x (VPMOVVec32x16ToM mask)) -(MaskedApproximateReciprocalFloat64x2 x mask) => (VRCP14PDMasked128 x (VPMOVVec64x2ToM mask)) -(MaskedApproximateReciprocalFloat64x4 x mask) => (VRCP14PDMasked256 x (VPMOVVec64x4ToM mask)) -(MaskedApproximateReciprocalFloat64x8 x mask) => (VRCP14PDMasked512 x (VPMOVVec64x8ToM mask)) -(MaskedApproximateReciprocalOfSqrtFloat32x4 x mask) => (VRSQRT14PSMasked128 x (VPMOVVec32x4ToM mask)) -(MaskedApproximateReciprocalOfSqrtFloat32x8 x mask) => (VRSQRT14PSMasked256 x (VPMOVVec32x8ToM mask)) -(MaskedApproximateReciprocalOfSqrtFloat32x16 x mask) => (VRSQRT14PSMasked512 x (VPMOVVec32x16ToM mask)) -(MaskedApproximateReciprocalOfSqrtFloat64x2 x mask) => (VRSQRT14PDMasked128 x (VPMOVVec64x2ToM mask)) -(MaskedApproximateReciprocalOfSqrtFloat64x4 x mask) => (VRSQRT14PDMasked256 x (VPMOVVec64x4ToM mask)) -(MaskedApproximateReciprocalOfSqrtFloat64x8 x mask) => (VRSQRT14PDMasked512 x (VPMOVVec64x8ToM mask)) -(MaskedAverageUint8x16 x y mask) => (VPAVGBMasked128 x y (VPMOVVec8x16ToM mask)) -(MaskedAverageUint8x32 x y mask) => (VPAVGBMasked256 x y (VPMOVVec8x32ToM mask)) -(MaskedAverageUint8x64 x y mask) => (VPAVGBMasked512 x y (VPMOVVec8x64ToM mask)) -(MaskedAverageUint16x8 x y mask) => (VPAVGWMasked128 x y (VPMOVVec16x8ToM mask)) -(MaskedAverageUint16x16 x y mask) => (VPAVGWMasked256 x y (VPMOVVec16x16ToM mask)) -(MaskedAverageUint16x32 x y mask) => (VPAVGWMasked512 x y (VPMOVVec16x32ToM mask)) -(MaskedCeilWithPrecisionFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+2] x (VPMOVVec32x4ToM mask)) -(MaskedCeilWithPrecisionFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+2] x (VPMOVVec32x8ToM mask)) -(MaskedCeilWithPrecisionFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+2] x (VPMOVVec32x16ToM mask)) -(MaskedCeilWithPrecisionFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+2] x (VPMOVVec64x2ToM mask)) -(MaskedCeilWithPrecisionFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+2] x (VPMOVVec64x4ToM mask)) -(MaskedCeilWithPrecisionFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+2] x (VPMOVVec64x8ToM mask)) -(MaskedDiffWithCeilWithPrecisionFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+2] x (VPMOVVec32x4ToM mask)) -(MaskedDiffWithCeilWithPrecisionFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+2] x (VPMOVVec32x8ToM mask)) -(MaskedDiffWithCeilWithPrecisionFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+2] x (VPMOVVec32x16ToM mask)) -(MaskedDiffWithCeilWithPrecisionFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+2] x (VPMOVVec64x2ToM mask)) -(MaskedDiffWithCeilWithPrecisionFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+2] x (VPMOVVec64x4ToM mask)) -(MaskedDiffWithCeilWithPrecisionFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+2] x (VPMOVVec64x8ToM mask)) -(MaskedDiffWithFloorWithPrecisionFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+1] x (VPMOVVec32x4ToM mask)) -(MaskedDiffWithFloorWithPrecisionFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+1] x (VPMOVVec32x8ToM mask)) -(MaskedDiffWithFloorWithPrecisionFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+1] x (VPMOVVec32x16ToM mask)) -(MaskedDiffWithFloorWithPrecisionFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+1] x (VPMOVVec64x2ToM mask)) -(MaskedDiffWithFloorWithPrecisionFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+1] x (VPMOVVec64x4ToM mask)) -(MaskedDiffWithFloorWithPrecisionFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+1] x (VPMOVVec64x8ToM mask)) -(MaskedDiffWithRoundWithPrecisionFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+0] x (VPMOVVec32x4ToM mask)) -(MaskedDiffWithRoundWithPrecisionFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+0] x (VPMOVVec32x8ToM mask)) -(MaskedDiffWithRoundWithPrecisionFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+0] x (VPMOVVec32x16ToM mask)) -(MaskedDiffWithRoundWithPrecisionFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+0] x (VPMOVVec64x2ToM mask)) -(MaskedDiffWithRoundWithPrecisionFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+0] x (VPMOVVec64x4ToM mask)) -(MaskedDiffWithRoundWithPrecisionFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+0] x (VPMOVVec64x8ToM mask)) -(MaskedDiffWithTruncWithPrecisionFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+3] x (VPMOVVec32x4ToM mask)) -(MaskedDiffWithTruncWithPrecisionFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+3] x (VPMOVVec32x8ToM mask)) -(MaskedDiffWithTruncWithPrecisionFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+3] x (VPMOVVec32x16ToM mask)) -(MaskedDiffWithTruncWithPrecisionFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+3] x (VPMOVVec64x2ToM mask)) -(MaskedDiffWithTruncWithPrecisionFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+3] x (VPMOVVec64x4ToM mask)) -(MaskedDiffWithTruncWithPrecisionFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+3] x (VPMOVVec64x8ToM mask)) -(MaskedDivFloat32x4 x y mask) => (VDIVPSMasked128 x y (VPMOVVec32x4ToM mask)) -(MaskedDivFloat32x8 x y mask) => (VDIVPSMasked256 x y (VPMOVVec32x8ToM mask)) -(MaskedDivFloat32x16 x y mask) => (VDIVPSMasked512 x y (VPMOVVec32x16ToM mask)) -(MaskedDivFloat64x2 x y mask) => (VDIVPDMasked128 x y (VPMOVVec64x2ToM mask)) -(MaskedDivFloat64x4 x y mask) => (VDIVPDMasked256 x y (VPMOVVec64x4ToM mask)) -(MaskedDivFloat64x8 x y mask) => (VDIVPDMasked512 x y (VPMOVVec64x8ToM mask)) -(MaskedEqualFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [0] x y (VPMOVVec32x4ToM mask))) -(MaskedEqualFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [0] x y (VPMOVVec32x8ToM mask))) -(MaskedEqualFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [0] x y (VPMOVVec32x16ToM mask))) -(MaskedEqualFloat64x2 x y mask) => (VPMOVMToVec64x2 (VCMPPDMasked128 [0] x y (VPMOVVec64x2ToM mask))) -(MaskedEqualFloat64x4 x y mask) => (VPMOVMToVec64x4 (VCMPPDMasked256 [0] x y (VPMOVVec64x4ToM mask))) -(MaskedEqualFloat64x8 x y mask) => (VPMOVMToVec64x8 (VCMPPDMasked512 [0] x y (VPMOVVec64x8ToM mask))) -(MaskedEqualInt8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPBMasked128 [0] x y (VPMOVVec8x16ToM mask))) -(MaskedEqualInt8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPBMasked256 [0] x y (VPMOVVec8x32ToM mask))) -(MaskedEqualInt8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPBMasked512 [0] x y (VPMOVVec8x64ToM mask))) -(MaskedEqualInt16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPWMasked128 [0] x y (VPMOVVec16x8ToM mask))) -(MaskedEqualInt16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPWMasked256 [0] x y (VPMOVVec16x16ToM mask))) -(MaskedEqualInt16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPWMasked512 [0] x y (VPMOVVec16x32ToM mask))) -(MaskedEqualInt32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPDMasked128 [0] x y (VPMOVVec32x4ToM mask))) -(MaskedEqualInt32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPDMasked256 [0] x y (VPMOVVec32x8ToM mask))) -(MaskedEqualInt32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPDMasked512 [0] x y (VPMOVVec32x16ToM mask))) -(MaskedEqualInt64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPQMasked128 [0] x y (VPMOVVec64x2ToM mask))) -(MaskedEqualInt64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPQMasked256 [0] x y (VPMOVVec64x4ToM mask))) -(MaskedEqualInt64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPQMasked512 [0] x y (VPMOVVec64x8ToM mask))) -(MaskedEqualUint8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPUBMasked128 [0] x y (VPMOVVec8x16ToM mask))) -(MaskedEqualUint8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPUBMasked256 [0] x y (VPMOVVec8x32ToM mask))) -(MaskedEqualUint8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPUBMasked512 [0] x y (VPMOVVec8x64ToM mask))) -(MaskedEqualUint16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPUWMasked128 [0] x y (VPMOVVec16x8ToM mask))) -(MaskedEqualUint16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPUWMasked256 [0] x y (VPMOVVec16x16ToM mask))) -(MaskedEqualUint16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPUWMasked512 [0] x y (VPMOVVec16x32ToM mask))) -(MaskedEqualUint32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPUDMasked128 [0] x y (VPMOVVec32x4ToM mask))) -(MaskedEqualUint32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPUDMasked256 [0] x y (VPMOVVec32x8ToM mask))) -(MaskedEqualUint32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPUDMasked512 [0] x y (VPMOVVec32x16ToM mask))) -(MaskedEqualUint64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPUQMasked128 [0] x y (VPMOVVec64x2ToM mask))) -(MaskedEqualUint64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPUQMasked256 [0] x y (VPMOVVec64x4ToM mask))) -(MaskedEqualUint64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPUQMasked512 [0] x y (VPMOVVec64x8ToM mask))) -(MaskedFloorWithPrecisionFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+1] x (VPMOVVec32x4ToM mask)) -(MaskedFloorWithPrecisionFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+1] x (VPMOVVec32x8ToM mask)) -(MaskedFloorWithPrecisionFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+1] x (VPMOVVec32x16ToM mask)) -(MaskedFloorWithPrecisionFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+1] x (VPMOVVec64x2ToM mask)) -(MaskedFloorWithPrecisionFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+1] x (VPMOVVec64x4ToM mask)) -(MaskedFloorWithPrecisionFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+1] x (VPMOVVec64x8ToM mask)) -(MaskedFusedMultiplyAddFloat32x4 x y z mask) => (VFMADD213PSMasked128 x y z (VPMOVVec32x4ToM mask)) -(MaskedFusedMultiplyAddFloat32x8 x y z mask) => (VFMADD213PSMasked256 x y z (VPMOVVec32x8ToM mask)) -(MaskedFusedMultiplyAddFloat32x16 x y z mask) => (VFMADD213PSMasked512 x y z (VPMOVVec32x16ToM mask)) -(MaskedFusedMultiplyAddFloat64x2 x y z mask) => (VFMADD213PDMasked128 x y z (VPMOVVec64x2ToM mask)) -(MaskedFusedMultiplyAddFloat64x4 x y z mask) => (VFMADD213PDMasked256 x y z (VPMOVVec64x4ToM mask)) -(MaskedFusedMultiplyAddFloat64x8 x y z mask) => (VFMADD213PDMasked512 x y z (VPMOVVec64x8ToM mask)) -(MaskedFusedMultiplyAddSubFloat32x4 x y z mask) => (VFMADDSUB213PSMasked128 x y z (VPMOVVec32x4ToM mask)) -(MaskedFusedMultiplyAddSubFloat32x8 x y z mask) => (VFMADDSUB213PSMasked256 x y z (VPMOVVec32x8ToM mask)) -(MaskedFusedMultiplyAddSubFloat32x16 x y z mask) => (VFMADDSUB213PSMasked512 x y z (VPMOVVec32x16ToM mask)) -(MaskedFusedMultiplyAddSubFloat64x2 x y z mask) => (VFMADDSUB213PDMasked128 x y z (VPMOVVec64x2ToM mask)) -(MaskedFusedMultiplyAddSubFloat64x4 x y z mask) => (VFMADDSUB213PDMasked256 x y z (VPMOVVec64x4ToM mask)) -(MaskedFusedMultiplyAddSubFloat64x8 x y z mask) => (VFMADDSUB213PDMasked512 x y z (VPMOVVec64x8ToM mask)) -(MaskedFusedMultiplySubAddFloat32x4 x y z mask) => (VFMSUBADD213PSMasked128 x y z (VPMOVVec32x4ToM mask)) -(MaskedFusedMultiplySubAddFloat32x8 x y z mask) => (VFMSUBADD213PSMasked256 x y z (VPMOVVec32x8ToM mask)) -(MaskedFusedMultiplySubAddFloat32x16 x y z mask) => (VFMSUBADD213PSMasked512 x y z (VPMOVVec32x16ToM mask)) -(MaskedFusedMultiplySubAddFloat64x2 x y z mask) => (VFMSUBADD213PDMasked128 x y z (VPMOVVec64x2ToM mask)) -(MaskedFusedMultiplySubAddFloat64x4 x y z mask) => (VFMSUBADD213PDMasked256 x y z (VPMOVVec64x4ToM mask)) -(MaskedFusedMultiplySubAddFloat64x8 x y z mask) => (VFMSUBADD213PDMasked512 x y z (VPMOVVec64x8ToM mask)) -(MaskedGaloisFieldAffineTransformUint8x16 [a] x y mask) => (VGF2P8AFFINEQBMasked128 [a] x y (VPMOVVec8x16ToM mask)) -(MaskedGaloisFieldAffineTransformUint8x32 [a] x y mask) => (VGF2P8AFFINEQBMasked256 [a] x y (VPMOVVec8x32ToM mask)) -(MaskedGaloisFieldAffineTransformUint8x64 [a] x y mask) => (VGF2P8AFFINEQBMasked512 [a] x y (VPMOVVec8x64ToM mask)) -(MaskedGaloisFieldAffineTransformInversedUint8x16 [a] x y mask) => (VGF2P8AFFINEINVQBMasked128 [a] x y (VPMOVVec8x16ToM mask)) -(MaskedGaloisFieldAffineTransformInversedUint8x32 [a] x y mask) => (VGF2P8AFFINEINVQBMasked256 [a] x y (VPMOVVec8x32ToM mask)) -(MaskedGaloisFieldAffineTransformInversedUint8x64 [a] x y mask) => (VGF2P8AFFINEINVQBMasked512 [a] x y (VPMOVVec8x64ToM mask)) -(MaskedGaloisFieldMulUint8x16 x y mask) => (VGF2P8MULBMasked128 x y (VPMOVVec8x16ToM mask)) -(MaskedGaloisFieldMulUint8x32 x y mask) => (VGF2P8MULBMasked256 x y (VPMOVVec8x32ToM mask)) -(MaskedGaloisFieldMulUint8x64 x y mask) => (VGF2P8MULBMasked512 x y (VPMOVVec8x64ToM mask)) -(MaskedGreaterFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [14] x y (VPMOVVec32x4ToM mask))) -(MaskedGreaterFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [14] x y (VPMOVVec32x8ToM mask))) -(MaskedGreaterFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [14] x y (VPMOVVec32x16ToM mask))) -(MaskedGreaterFloat64x2 x y mask) => (VPMOVMToVec64x2 (VCMPPDMasked128 [14] x y (VPMOVVec64x2ToM mask))) -(MaskedGreaterFloat64x4 x y mask) => (VPMOVMToVec64x4 (VCMPPDMasked256 [14] x y (VPMOVVec64x4ToM mask))) -(MaskedGreaterFloat64x8 x y mask) => (VPMOVMToVec64x8 (VCMPPDMasked512 [14] x y (VPMOVVec64x8ToM mask))) -(MaskedGreaterInt8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPBMasked128 [14] x y (VPMOVVec8x16ToM mask))) -(MaskedGreaterInt8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPBMasked256 [14] x y (VPMOVVec8x32ToM mask))) -(MaskedGreaterInt8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPBMasked512 [14] x y (VPMOVVec8x64ToM mask))) -(MaskedGreaterInt16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPWMasked128 [14] x y (VPMOVVec16x8ToM mask))) -(MaskedGreaterInt16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPWMasked256 [14] x y (VPMOVVec16x16ToM mask))) -(MaskedGreaterInt16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPWMasked512 [14] x y (VPMOVVec16x32ToM mask))) -(MaskedGreaterInt32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPDMasked128 [14] x y (VPMOVVec32x4ToM mask))) -(MaskedGreaterInt32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPDMasked256 [14] x y (VPMOVVec32x8ToM mask))) -(MaskedGreaterInt32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPDMasked512 [14] x y (VPMOVVec32x16ToM mask))) -(MaskedGreaterInt64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPQMasked128 [14] x y (VPMOVVec64x2ToM mask))) -(MaskedGreaterInt64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPQMasked256 [14] x y (VPMOVVec64x4ToM mask))) -(MaskedGreaterInt64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPQMasked512 [14] x y (VPMOVVec64x8ToM mask))) -(MaskedGreaterUint8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPUBMasked128 [14] x y (VPMOVVec8x16ToM mask))) -(MaskedGreaterUint8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPUBMasked256 [14] x y (VPMOVVec8x32ToM mask))) -(MaskedGreaterUint8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPUBMasked512 [14] x y (VPMOVVec8x64ToM mask))) -(MaskedGreaterUint16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPUWMasked128 [14] x y (VPMOVVec16x8ToM mask))) -(MaskedGreaterUint16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPUWMasked256 [14] x y (VPMOVVec16x16ToM mask))) -(MaskedGreaterUint16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPUWMasked512 [14] x y (VPMOVVec16x32ToM mask))) -(MaskedGreaterUint32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPUDMasked128 [14] x y (VPMOVVec32x4ToM mask))) -(MaskedGreaterUint32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPUDMasked256 [14] x y (VPMOVVec32x8ToM mask))) -(MaskedGreaterUint32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPUDMasked512 [14] x y (VPMOVVec32x16ToM mask))) -(MaskedGreaterUint64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPUQMasked128 [14] x y (VPMOVVec64x2ToM mask))) -(MaskedGreaterUint64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPUQMasked256 [14] x y (VPMOVVec64x4ToM mask))) -(MaskedGreaterUint64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPUQMasked512 [14] x y (VPMOVVec64x8ToM mask))) -(MaskedGreaterEqualFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [13] x y (VPMOVVec32x4ToM mask))) -(MaskedGreaterEqualFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [13] x y (VPMOVVec32x8ToM mask))) -(MaskedGreaterEqualFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [13] x y (VPMOVVec32x16ToM mask))) -(MaskedGreaterEqualFloat64x2 x y mask) => (VPMOVMToVec64x2 (VCMPPDMasked128 [13] x y (VPMOVVec64x2ToM mask))) -(MaskedGreaterEqualFloat64x4 x y mask) => (VPMOVMToVec64x4 (VCMPPDMasked256 [13] x y (VPMOVVec64x4ToM mask))) -(MaskedGreaterEqualFloat64x8 x y mask) => (VPMOVMToVec64x8 (VCMPPDMasked512 [13] x y (VPMOVVec64x8ToM mask))) -(MaskedGreaterEqualInt8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPBMasked128 [13] x y (VPMOVVec8x16ToM mask))) -(MaskedGreaterEqualInt8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPBMasked256 [13] x y (VPMOVVec8x32ToM mask))) -(MaskedGreaterEqualInt8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPBMasked512 [13] x y (VPMOVVec8x64ToM mask))) -(MaskedGreaterEqualInt16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPWMasked128 [13] x y (VPMOVVec16x8ToM mask))) -(MaskedGreaterEqualInt16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPWMasked256 [13] x y (VPMOVVec16x16ToM mask))) -(MaskedGreaterEqualInt16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPWMasked512 [13] x y (VPMOVVec16x32ToM mask))) -(MaskedGreaterEqualInt32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPDMasked128 [13] x y (VPMOVVec32x4ToM mask))) -(MaskedGreaterEqualInt32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPDMasked256 [13] x y (VPMOVVec32x8ToM mask))) -(MaskedGreaterEqualInt32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPDMasked512 [13] x y (VPMOVVec32x16ToM mask))) -(MaskedGreaterEqualInt64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPQMasked128 [13] x y (VPMOVVec64x2ToM mask))) -(MaskedGreaterEqualInt64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPQMasked256 [13] x y (VPMOVVec64x4ToM mask))) -(MaskedGreaterEqualInt64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPQMasked512 [13] x y (VPMOVVec64x8ToM mask))) -(MaskedGreaterEqualUint8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPUBMasked128 [13] x y (VPMOVVec8x16ToM mask))) -(MaskedGreaterEqualUint8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPUBMasked256 [13] x y (VPMOVVec8x32ToM mask))) -(MaskedGreaterEqualUint8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPUBMasked512 [13] x y (VPMOVVec8x64ToM mask))) -(MaskedGreaterEqualUint16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPUWMasked128 [13] x y (VPMOVVec16x8ToM mask))) -(MaskedGreaterEqualUint16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPUWMasked256 [13] x y (VPMOVVec16x16ToM mask))) -(MaskedGreaterEqualUint16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPUWMasked512 [13] x y (VPMOVVec16x32ToM mask))) -(MaskedGreaterEqualUint32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPUDMasked128 [13] x y (VPMOVVec32x4ToM mask))) -(MaskedGreaterEqualUint32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPUDMasked256 [13] x y (VPMOVVec32x8ToM mask))) -(MaskedGreaterEqualUint32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPUDMasked512 [13] x y (VPMOVVec32x16ToM mask))) -(MaskedGreaterEqualUint64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPUQMasked128 [13] x y (VPMOVVec64x2ToM mask))) -(MaskedGreaterEqualUint64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPUQMasked256 [13] x y (VPMOVVec64x4ToM mask))) -(MaskedGreaterEqualUint64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPUQMasked512 [13] x y (VPMOVVec64x8ToM mask))) -(MaskedIsNanFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [3] x y (VPMOVVec32x4ToM mask))) -(MaskedIsNanFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [3] x y (VPMOVVec32x8ToM mask))) -(MaskedIsNanFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [3] x y (VPMOVVec32x16ToM mask))) -(MaskedIsNanFloat64x2 x y mask) => (VPMOVMToVec64x2 (VCMPPDMasked128 [3] x y (VPMOVVec64x2ToM mask))) -(MaskedIsNanFloat64x4 x y mask) => (VPMOVMToVec64x4 (VCMPPDMasked256 [3] x y (VPMOVVec64x4ToM mask))) -(MaskedIsNanFloat64x8 x y mask) => (VPMOVMToVec64x8 (VCMPPDMasked512 [3] x y (VPMOVVec64x8ToM mask))) -(MaskedLessFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [1] x y (VPMOVVec32x4ToM mask))) -(MaskedLessFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [1] x y (VPMOVVec32x8ToM mask))) -(MaskedLessFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [1] x y (VPMOVVec32x16ToM mask))) -(MaskedLessFloat64x2 x y mask) => (VPMOVMToVec64x2 (VCMPPDMasked128 [1] x y (VPMOVVec64x2ToM mask))) -(MaskedLessFloat64x4 x y mask) => (VPMOVMToVec64x4 (VCMPPDMasked256 [1] x y (VPMOVVec64x4ToM mask))) -(MaskedLessFloat64x8 x y mask) => (VPMOVMToVec64x8 (VCMPPDMasked512 [1] x y (VPMOVVec64x8ToM mask))) -(MaskedLessInt8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPBMasked128 [1] x y (VPMOVVec8x16ToM mask))) -(MaskedLessInt8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPBMasked256 [1] x y (VPMOVVec8x32ToM mask))) -(MaskedLessInt8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPBMasked512 [1] x y (VPMOVVec8x64ToM mask))) -(MaskedLessInt16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPWMasked128 [1] x y (VPMOVVec16x8ToM mask))) -(MaskedLessInt16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPWMasked256 [1] x y (VPMOVVec16x16ToM mask))) -(MaskedLessInt16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPWMasked512 [1] x y (VPMOVVec16x32ToM mask))) -(MaskedLessInt32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPDMasked128 [1] x y (VPMOVVec32x4ToM mask))) -(MaskedLessInt32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPDMasked256 [1] x y (VPMOVVec32x8ToM mask))) -(MaskedLessInt32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPDMasked512 [1] x y (VPMOVVec32x16ToM mask))) -(MaskedLessInt64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPQMasked128 [1] x y (VPMOVVec64x2ToM mask))) -(MaskedLessInt64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPQMasked256 [1] x y (VPMOVVec64x4ToM mask))) -(MaskedLessInt64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPQMasked512 [1] x y (VPMOVVec64x8ToM mask))) -(MaskedLessUint8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPUBMasked128 [1] x y (VPMOVVec8x16ToM mask))) -(MaskedLessUint8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPUBMasked256 [1] x y (VPMOVVec8x32ToM mask))) -(MaskedLessUint8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPUBMasked512 [1] x y (VPMOVVec8x64ToM mask))) -(MaskedLessUint16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPUWMasked128 [1] x y (VPMOVVec16x8ToM mask))) -(MaskedLessUint16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPUWMasked256 [1] x y (VPMOVVec16x16ToM mask))) -(MaskedLessUint16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPUWMasked512 [1] x y (VPMOVVec16x32ToM mask))) -(MaskedLessUint32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPUDMasked128 [1] x y (VPMOVVec32x4ToM mask))) -(MaskedLessUint32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPUDMasked256 [1] x y (VPMOVVec32x8ToM mask))) -(MaskedLessUint32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPUDMasked512 [1] x y (VPMOVVec32x16ToM mask))) -(MaskedLessUint64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPUQMasked128 [1] x y (VPMOVVec64x2ToM mask))) -(MaskedLessUint64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPUQMasked256 [1] x y (VPMOVVec64x4ToM mask))) -(MaskedLessUint64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPUQMasked512 [1] x y (VPMOVVec64x8ToM mask))) -(MaskedLessEqualFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [2] x y (VPMOVVec32x4ToM mask))) -(MaskedLessEqualFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [2] x y (VPMOVVec32x8ToM mask))) -(MaskedLessEqualFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [2] x y (VPMOVVec32x16ToM mask))) -(MaskedLessEqualFloat64x2 x y mask) => (VPMOVMToVec64x2 (VCMPPDMasked128 [2] x y (VPMOVVec64x2ToM mask))) -(MaskedLessEqualFloat64x4 x y mask) => (VPMOVMToVec64x4 (VCMPPDMasked256 [2] x y (VPMOVVec64x4ToM mask))) -(MaskedLessEqualFloat64x8 x y mask) => (VPMOVMToVec64x8 (VCMPPDMasked512 [2] x y (VPMOVVec64x8ToM mask))) -(MaskedLessEqualInt8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPBMasked128 [2] x y (VPMOVVec8x16ToM mask))) -(MaskedLessEqualInt8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPBMasked256 [2] x y (VPMOVVec8x32ToM mask))) -(MaskedLessEqualInt8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPBMasked512 [2] x y (VPMOVVec8x64ToM mask))) -(MaskedLessEqualInt16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPWMasked128 [2] x y (VPMOVVec16x8ToM mask))) -(MaskedLessEqualInt16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPWMasked256 [2] x y (VPMOVVec16x16ToM mask))) -(MaskedLessEqualInt16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPWMasked512 [2] x y (VPMOVVec16x32ToM mask))) -(MaskedLessEqualInt32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPDMasked128 [2] x y (VPMOVVec32x4ToM mask))) -(MaskedLessEqualInt32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPDMasked256 [2] x y (VPMOVVec32x8ToM mask))) -(MaskedLessEqualInt32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPDMasked512 [2] x y (VPMOVVec32x16ToM mask))) -(MaskedLessEqualInt64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPQMasked128 [2] x y (VPMOVVec64x2ToM mask))) -(MaskedLessEqualInt64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPQMasked256 [2] x y (VPMOVVec64x4ToM mask))) -(MaskedLessEqualInt64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPQMasked512 [2] x y (VPMOVVec64x8ToM mask))) -(MaskedLessEqualUint8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPUBMasked128 [2] x y (VPMOVVec8x16ToM mask))) -(MaskedLessEqualUint8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPUBMasked256 [2] x y (VPMOVVec8x32ToM mask))) -(MaskedLessEqualUint8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPUBMasked512 [2] x y (VPMOVVec8x64ToM mask))) -(MaskedLessEqualUint16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPUWMasked128 [2] x y (VPMOVVec16x8ToM mask))) -(MaskedLessEqualUint16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPUWMasked256 [2] x y (VPMOVVec16x16ToM mask))) -(MaskedLessEqualUint16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPUWMasked512 [2] x y (VPMOVVec16x32ToM mask))) -(MaskedLessEqualUint32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPUDMasked128 [2] x y (VPMOVVec32x4ToM mask))) -(MaskedLessEqualUint32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPUDMasked256 [2] x y (VPMOVVec32x8ToM mask))) -(MaskedLessEqualUint32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPUDMasked512 [2] x y (VPMOVVec32x16ToM mask))) -(MaskedLessEqualUint64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPUQMasked128 [2] x y (VPMOVVec64x2ToM mask))) -(MaskedLessEqualUint64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPUQMasked256 [2] x y (VPMOVVec64x4ToM mask))) -(MaskedLessEqualUint64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPUQMasked512 [2] x y (VPMOVVec64x8ToM mask))) -(MaskedMaxFloat32x4 x y mask) => (VMAXPSMasked128 x y (VPMOVVec32x4ToM mask)) -(MaskedMaxFloat32x8 x y mask) => (VMAXPSMasked256 x y (VPMOVVec32x8ToM mask)) -(MaskedMaxFloat32x16 x y mask) => (VMAXPSMasked512 x y (VPMOVVec32x16ToM mask)) -(MaskedMaxFloat64x2 x y mask) => (VMAXPDMasked128 x y (VPMOVVec64x2ToM mask)) -(MaskedMaxFloat64x4 x y mask) => (VMAXPDMasked256 x y (VPMOVVec64x4ToM mask)) -(MaskedMaxFloat64x8 x y mask) => (VMAXPDMasked512 x y (VPMOVVec64x8ToM mask)) -(MaskedMaxInt8x16 x y mask) => (VPMAXSBMasked128 x y (VPMOVVec8x16ToM mask)) -(MaskedMaxInt8x32 x y mask) => (VPMAXSBMasked256 x y (VPMOVVec8x32ToM mask)) -(MaskedMaxInt8x64 x y mask) => (VPMAXSBMasked512 x y (VPMOVVec8x64ToM mask)) -(MaskedMaxInt16x8 x y mask) => (VPMAXSWMasked128 x y (VPMOVVec16x8ToM mask)) -(MaskedMaxInt16x16 x y mask) => (VPMAXSWMasked256 x y (VPMOVVec16x16ToM mask)) -(MaskedMaxInt16x32 x y mask) => (VPMAXSWMasked512 x y (VPMOVVec16x32ToM mask)) -(MaskedMaxInt32x4 x y mask) => (VPMAXSDMasked128 x y (VPMOVVec32x4ToM mask)) -(MaskedMaxInt32x8 x y mask) => (VPMAXSDMasked256 x y (VPMOVVec32x8ToM mask)) -(MaskedMaxInt32x16 x y mask) => (VPMAXSDMasked512 x y (VPMOVVec32x16ToM mask)) -(MaskedMaxInt64x2 x y mask) => (VPMAXSQMasked128 x y (VPMOVVec64x2ToM mask)) -(MaskedMaxInt64x4 x y mask) => (VPMAXSQMasked256 x y (VPMOVVec64x4ToM mask)) -(MaskedMaxInt64x8 x y mask) => (VPMAXSQMasked512 x y (VPMOVVec64x8ToM mask)) -(MaskedMaxUint8x16 x y mask) => (VPMAXUBMasked128 x y (VPMOVVec8x16ToM mask)) -(MaskedMaxUint8x32 x y mask) => (VPMAXUBMasked256 x y (VPMOVVec8x32ToM mask)) -(MaskedMaxUint8x64 x y mask) => (VPMAXUBMasked512 x y (VPMOVVec8x64ToM mask)) -(MaskedMaxUint16x8 x y mask) => (VPMAXUWMasked128 x y (VPMOVVec16x8ToM mask)) -(MaskedMaxUint16x16 x y mask) => (VPMAXUWMasked256 x y (VPMOVVec16x16ToM mask)) -(MaskedMaxUint16x32 x y mask) => (VPMAXUWMasked512 x y (VPMOVVec16x32ToM mask)) -(MaskedMaxUint32x4 x y mask) => (VPMAXUDMasked128 x y (VPMOVVec32x4ToM mask)) -(MaskedMaxUint32x8 x y mask) => (VPMAXUDMasked256 x y (VPMOVVec32x8ToM mask)) -(MaskedMaxUint32x16 x y mask) => (VPMAXUDMasked512 x y (VPMOVVec32x16ToM mask)) -(MaskedMaxUint64x2 x y mask) => (VPMAXUQMasked128 x y (VPMOVVec64x2ToM mask)) -(MaskedMaxUint64x4 x y mask) => (VPMAXUQMasked256 x y (VPMOVVec64x4ToM mask)) -(MaskedMaxUint64x8 x y mask) => (VPMAXUQMasked512 x y (VPMOVVec64x8ToM mask)) -(MaskedMinFloat32x4 x y mask) => (VMINPSMasked128 x y (VPMOVVec32x4ToM mask)) -(MaskedMinFloat32x8 x y mask) => (VMINPSMasked256 x y (VPMOVVec32x8ToM mask)) -(MaskedMinFloat32x16 x y mask) => (VMINPSMasked512 x y (VPMOVVec32x16ToM mask)) -(MaskedMinFloat64x2 x y mask) => (VMINPDMasked128 x y (VPMOVVec64x2ToM mask)) -(MaskedMinFloat64x4 x y mask) => (VMINPDMasked256 x y (VPMOVVec64x4ToM mask)) -(MaskedMinFloat64x8 x y mask) => (VMINPDMasked512 x y (VPMOVVec64x8ToM mask)) -(MaskedMinInt8x16 x y mask) => (VPMINSBMasked128 x y (VPMOVVec8x16ToM mask)) -(MaskedMinInt8x32 x y mask) => (VPMINSBMasked256 x y (VPMOVVec8x32ToM mask)) -(MaskedMinInt8x64 x y mask) => (VPMINSBMasked512 x y (VPMOVVec8x64ToM mask)) -(MaskedMinInt16x8 x y mask) => (VPMINSWMasked128 x y (VPMOVVec16x8ToM mask)) -(MaskedMinInt16x16 x y mask) => (VPMINSWMasked256 x y (VPMOVVec16x16ToM mask)) -(MaskedMinInt16x32 x y mask) => (VPMINSWMasked512 x y (VPMOVVec16x32ToM mask)) -(MaskedMinInt32x4 x y mask) => (VPMINSDMasked128 x y (VPMOVVec32x4ToM mask)) -(MaskedMinInt32x8 x y mask) => (VPMINSDMasked256 x y (VPMOVVec32x8ToM mask)) -(MaskedMinInt32x16 x y mask) => (VPMINSDMasked512 x y (VPMOVVec32x16ToM mask)) -(MaskedMinInt64x2 x y mask) => (VPMINSQMasked128 x y (VPMOVVec64x2ToM mask)) -(MaskedMinInt64x4 x y mask) => (VPMINSQMasked256 x y (VPMOVVec64x4ToM mask)) -(MaskedMinInt64x8 x y mask) => (VPMINSQMasked512 x y (VPMOVVec64x8ToM mask)) -(MaskedMinUint8x16 x y mask) => (VPMINUBMasked128 x y (VPMOVVec8x16ToM mask)) -(MaskedMinUint8x32 x y mask) => (VPMINUBMasked256 x y (VPMOVVec8x32ToM mask)) -(MaskedMinUint8x64 x y mask) => (VPMINUBMasked512 x y (VPMOVVec8x64ToM mask)) -(MaskedMinUint16x8 x y mask) => (VPMINUWMasked128 x y (VPMOVVec16x8ToM mask)) -(MaskedMinUint16x16 x y mask) => (VPMINUWMasked256 x y (VPMOVVec16x16ToM mask)) -(MaskedMinUint16x32 x y mask) => (VPMINUWMasked512 x y (VPMOVVec16x32ToM mask)) -(MaskedMinUint32x4 x y mask) => (VPMINUDMasked128 x y (VPMOVVec32x4ToM mask)) -(MaskedMinUint32x8 x y mask) => (VPMINUDMasked256 x y (VPMOVVec32x8ToM mask)) -(MaskedMinUint32x16 x y mask) => (VPMINUDMasked512 x y (VPMOVVec32x16ToM mask)) -(MaskedMinUint64x2 x y mask) => (VPMINUQMasked128 x y (VPMOVVec64x2ToM mask)) -(MaskedMinUint64x4 x y mask) => (VPMINUQMasked256 x y (VPMOVVec64x4ToM mask)) -(MaskedMinUint64x8 x y mask) => (VPMINUQMasked512 x y (VPMOVVec64x8ToM mask)) -(MaskedMulFloat32x4 x y mask) => (VMULPSMasked128 x y (VPMOVVec32x4ToM mask)) -(MaskedMulFloat32x8 x y mask) => (VMULPSMasked256 x y (VPMOVVec32x8ToM mask)) -(MaskedMulFloat32x16 x y mask) => (VMULPSMasked512 x y (VPMOVVec32x16ToM mask)) -(MaskedMulFloat64x2 x y mask) => (VMULPDMasked128 x y (VPMOVVec64x2ToM mask)) -(MaskedMulFloat64x4 x y mask) => (VMULPDMasked256 x y (VPMOVVec64x4ToM mask)) -(MaskedMulFloat64x8 x y mask) => (VMULPDMasked512 x y (VPMOVVec64x8ToM mask)) -(MaskedMulByPowOf2Float32x4 x y mask) => (VSCALEFPSMasked128 x y (VPMOVVec32x4ToM mask)) -(MaskedMulByPowOf2Float32x8 x y mask) => (VSCALEFPSMasked256 x y (VPMOVVec32x8ToM mask)) -(MaskedMulByPowOf2Float32x16 x y mask) => (VSCALEFPSMasked512 x y (VPMOVVec32x16ToM mask)) -(MaskedMulByPowOf2Float64x2 x y mask) => (VSCALEFPDMasked128 x y (VPMOVVec64x2ToM mask)) -(MaskedMulByPowOf2Float64x4 x y mask) => (VSCALEFPDMasked256 x y (VPMOVVec64x4ToM mask)) -(MaskedMulByPowOf2Float64x8 x y mask) => (VSCALEFPDMasked512 x y (VPMOVVec64x8ToM mask)) -(MaskedMulEvenWidenInt64x2 x y mask) => (VPMULDQMasked128 x y (VPMOVVec64x2ToM mask)) -(MaskedMulEvenWidenInt64x4 x y mask) => (VPMULDQMasked256 x y (VPMOVVec64x4ToM mask)) -(MaskedMulEvenWidenInt64x8 x y mask) => (VPMULDQMasked512 x y (VPMOVVec64x8ToM mask)) -(MaskedMulEvenWidenUint64x2 x y mask) => (VPMULUDQMasked128 x y (VPMOVVec64x2ToM mask)) -(MaskedMulEvenWidenUint64x4 x y mask) => (VPMULUDQMasked256 x y (VPMOVVec64x4ToM mask)) -(MaskedMulEvenWidenUint64x8 x y mask) => (VPMULUDQMasked512 x y (VPMOVVec64x8ToM mask)) -(MaskedMulHighInt16x8 x y mask) => (VPMULHWMasked128 x y (VPMOVVec16x8ToM mask)) -(MaskedMulHighInt16x16 x y mask) => (VPMULHWMasked256 x y (VPMOVVec16x16ToM mask)) -(MaskedMulHighInt16x32 x y mask) => (VPMULHWMasked512 x y (VPMOVVec16x32ToM mask)) -(MaskedMulHighUint16x8 x y mask) => (VPMULHUWMasked128 x y (VPMOVVec16x8ToM mask)) -(MaskedMulHighUint16x16 x y mask) => (VPMULHUWMasked256 x y (VPMOVVec16x16ToM mask)) -(MaskedMulHighUint16x32 x y mask) => (VPMULHUWMasked512 x y (VPMOVVec16x32ToM mask)) -(MaskedMulLowInt16x8 x y mask) => (VPMULLWMasked128 x y (VPMOVVec16x8ToM mask)) -(MaskedMulLowInt16x16 x y mask) => (VPMULLWMasked256 x y (VPMOVVec16x16ToM mask)) -(MaskedMulLowInt16x32 x y mask) => (VPMULLWMasked512 x y (VPMOVVec16x32ToM mask)) -(MaskedMulLowInt32x4 x y mask) => (VPMULLDMasked128 x y (VPMOVVec32x4ToM mask)) -(MaskedMulLowInt32x8 x y mask) => (VPMULLDMasked256 x y (VPMOVVec32x8ToM mask)) -(MaskedMulLowInt32x16 x y mask) => (VPMULLDMasked512 x y (VPMOVVec32x16ToM mask)) -(MaskedMulLowInt64x2 x y mask) => (VPMULLQMasked128 x y (VPMOVVec64x2ToM mask)) -(MaskedMulLowInt64x4 x y mask) => (VPMULLQMasked256 x y (VPMOVVec64x4ToM mask)) -(MaskedMulLowInt64x8 x y mask) => (VPMULLQMasked512 x y (VPMOVVec64x8ToM mask)) -(MaskedNotEqualFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [4] x y (VPMOVVec32x4ToM mask))) -(MaskedNotEqualFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [4] x y (VPMOVVec32x8ToM mask))) -(MaskedNotEqualFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [4] x y (VPMOVVec32x16ToM mask))) -(MaskedNotEqualFloat64x2 x y mask) => (VPMOVMToVec64x2 (VCMPPDMasked128 [4] x y (VPMOVVec64x2ToM mask))) -(MaskedNotEqualFloat64x4 x y mask) => (VPMOVMToVec64x4 (VCMPPDMasked256 [4] x y (VPMOVVec64x4ToM mask))) -(MaskedNotEqualFloat64x8 x y mask) => (VPMOVMToVec64x8 (VCMPPDMasked512 [4] x y (VPMOVVec64x8ToM mask))) -(MaskedNotEqualInt8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPBMasked128 [4] x y (VPMOVVec8x16ToM mask))) -(MaskedNotEqualInt8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPBMasked256 [4] x y (VPMOVVec8x32ToM mask))) -(MaskedNotEqualInt8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPBMasked512 [4] x y (VPMOVVec8x64ToM mask))) -(MaskedNotEqualInt16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPWMasked128 [4] x y (VPMOVVec16x8ToM mask))) -(MaskedNotEqualInt16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPWMasked256 [4] x y (VPMOVVec16x16ToM mask))) -(MaskedNotEqualInt16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPWMasked512 [4] x y (VPMOVVec16x32ToM mask))) -(MaskedNotEqualInt32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPDMasked128 [4] x y (VPMOVVec32x4ToM mask))) -(MaskedNotEqualInt32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPDMasked256 [4] x y (VPMOVVec32x8ToM mask))) -(MaskedNotEqualInt32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPDMasked512 [4] x y (VPMOVVec32x16ToM mask))) -(MaskedNotEqualInt64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPQMasked128 [4] x y (VPMOVVec64x2ToM mask))) -(MaskedNotEqualInt64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPQMasked256 [4] x y (VPMOVVec64x4ToM mask))) -(MaskedNotEqualInt64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPQMasked512 [4] x y (VPMOVVec64x8ToM mask))) -(MaskedNotEqualUint8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPUBMasked128 [4] x y (VPMOVVec8x16ToM mask))) -(MaskedNotEqualUint8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPUBMasked256 [4] x y (VPMOVVec8x32ToM mask))) -(MaskedNotEqualUint8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPUBMasked512 [4] x y (VPMOVVec8x64ToM mask))) -(MaskedNotEqualUint16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPUWMasked128 [4] x y (VPMOVVec16x8ToM mask))) -(MaskedNotEqualUint16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPUWMasked256 [4] x y (VPMOVVec16x16ToM mask))) -(MaskedNotEqualUint16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPUWMasked512 [4] x y (VPMOVVec16x32ToM mask))) -(MaskedNotEqualUint32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPUDMasked128 [4] x y (VPMOVVec32x4ToM mask))) -(MaskedNotEqualUint32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPUDMasked256 [4] x y (VPMOVVec32x8ToM mask))) -(MaskedNotEqualUint32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPUDMasked512 [4] x y (VPMOVVec32x16ToM mask))) -(MaskedNotEqualUint64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPUQMasked128 [4] x y (VPMOVVec64x2ToM mask))) -(MaskedNotEqualUint64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPUQMasked256 [4] x y (VPMOVVec64x4ToM mask))) -(MaskedNotEqualUint64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPUQMasked512 [4] x y (VPMOVVec64x8ToM mask))) -(MaskedOrInt32x4 x y mask) => (VPORDMasked128 x y (VPMOVVec32x4ToM mask)) -(MaskedOrInt32x8 x y mask) => (VPORDMasked256 x y (VPMOVVec32x8ToM mask)) -(MaskedOrInt32x16 x y mask) => (VPORDMasked512 x y (VPMOVVec32x16ToM mask)) -(MaskedOrInt64x2 x y mask) => (VPORQMasked128 x y (VPMOVVec64x2ToM mask)) -(MaskedOrInt64x4 x y mask) => (VPORQMasked256 x y (VPMOVVec64x4ToM mask)) -(MaskedOrInt64x8 x y mask) => (VPORQMasked512 x y (VPMOVVec64x8ToM mask)) -(MaskedOrUint32x4 x y mask) => (VPORDMasked128 x y (VPMOVVec32x4ToM mask)) -(MaskedOrUint32x8 x y mask) => (VPORDMasked256 x y (VPMOVVec32x8ToM mask)) -(MaskedOrUint32x16 x y mask) => (VPORDMasked512 x y (VPMOVVec32x16ToM mask)) -(MaskedOrUint64x2 x y mask) => (VPORQMasked128 x y (VPMOVVec64x2ToM mask)) -(MaskedOrUint64x4 x y mask) => (VPORQMasked256 x y (VPMOVVec64x4ToM mask)) -(MaskedOrUint64x8 x y mask) => (VPORQMasked512 x y (VPMOVVec64x8ToM mask)) -(MaskedPairDotProdInt16x8 x y mask) => (VPMADDWDMasked128 x y (VPMOVVec16x8ToM mask)) -(MaskedPairDotProdInt16x16 x y mask) => (VPMADDWDMasked256 x y (VPMOVVec16x16ToM mask)) -(MaskedPairDotProdInt16x32 x y mask) => (VPMADDWDMasked512 x y (VPMOVVec16x32ToM mask)) -(MaskedPairDotProdAccumulateInt32x4 x y z mask) => (VPDPWSSDMasked128 x y z (VPMOVVec32x4ToM mask)) -(MaskedPairDotProdAccumulateInt32x8 x y z mask) => (VPDPWSSDMasked256 x y z (VPMOVVec32x8ToM mask)) -(MaskedPairDotProdAccumulateInt32x16 x y z mask) => (VPDPWSSDMasked512 x y z (VPMOVVec32x16ToM mask)) -(MaskedPopCountInt8x16 x mask) => (VPOPCNTBMasked128 x (VPMOVVec8x16ToM mask)) -(MaskedPopCountInt8x32 x mask) => (VPOPCNTBMasked256 x (VPMOVVec8x32ToM mask)) -(MaskedPopCountInt8x64 x mask) => (VPOPCNTBMasked512 x (VPMOVVec8x64ToM mask)) -(MaskedPopCountInt16x8 x mask) => (VPOPCNTWMasked128 x (VPMOVVec16x8ToM mask)) -(MaskedPopCountInt16x16 x mask) => (VPOPCNTWMasked256 x (VPMOVVec16x16ToM mask)) -(MaskedPopCountInt16x32 x mask) => (VPOPCNTWMasked512 x (VPMOVVec16x32ToM mask)) -(MaskedPopCountInt32x4 x mask) => (VPOPCNTDMasked128 x (VPMOVVec32x4ToM mask)) -(MaskedPopCountInt32x8 x mask) => (VPOPCNTDMasked256 x (VPMOVVec32x8ToM mask)) -(MaskedPopCountInt32x16 x mask) => (VPOPCNTDMasked512 x (VPMOVVec32x16ToM mask)) -(MaskedPopCountInt64x2 x mask) => (VPOPCNTQMasked128 x (VPMOVVec64x2ToM mask)) -(MaskedPopCountInt64x4 x mask) => (VPOPCNTQMasked256 x (VPMOVVec64x4ToM mask)) -(MaskedPopCountInt64x8 x mask) => (VPOPCNTQMasked512 x (VPMOVVec64x8ToM mask)) -(MaskedPopCountUint8x16 x mask) => (VPOPCNTBMasked128 x (VPMOVVec8x16ToM mask)) -(MaskedPopCountUint8x32 x mask) => (VPOPCNTBMasked256 x (VPMOVVec8x32ToM mask)) -(MaskedPopCountUint8x64 x mask) => (VPOPCNTBMasked512 x (VPMOVVec8x64ToM mask)) -(MaskedPopCountUint16x8 x mask) => (VPOPCNTWMasked128 x (VPMOVVec16x8ToM mask)) -(MaskedPopCountUint16x16 x mask) => (VPOPCNTWMasked256 x (VPMOVVec16x16ToM mask)) -(MaskedPopCountUint16x32 x mask) => (VPOPCNTWMasked512 x (VPMOVVec16x32ToM mask)) -(MaskedPopCountUint32x4 x mask) => (VPOPCNTDMasked128 x (VPMOVVec32x4ToM mask)) -(MaskedPopCountUint32x8 x mask) => (VPOPCNTDMasked256 x (VPMOVVec32x8ToM mask)) -(MaskedPopCountUint32x16 x mask) => (VPOPCNTDMasked512 x (VPMOVVec32x16ToM mask)) -(MaskedPopCountUint64x2 x mask) => (VPOPCNTQMasked128 x (VPMOVVec64x2ToM mask)) -(MaskedPopCountUint64x4 x mask) => (VPOPCNTQMasked256 x (VPMOVVec64x4ToM mask)) -(MaskedPopCountUint64x8 x mask) => (VPOPCNTQMasked512 x (VPMOVVec64x8ToM mask)) -(MaskedRotateAllLeftInt32x4 [a] x mask) => (VPROLDMasked128 [a] x (VPMOVVec32x4ToM mask)) -(MaskedRotateAllLeftInt32x8 [a] x mask) => (VPROLDMasked256 [a] x (VPMOVVec32x8ToM mask)) -(MaskedRotateAllLeftInt32x16 [a] x mask) => (VPROLDMasked512 [a] x (VPMOVVec32x16ToM mask)) -(MaskedRotateAllLeftInt64x2 [a] x mask) => (VPROLQMasked128 [a] x (VPMOVVec64x2ToM mask)) -(MaskedRotateAllLeftInt64x4 [a] x mask) => (VPROLQMasked256 [a] x (VPMOVVec64x4ToM mask)) -(MaskedRotateAllLeftInt64x8 [a] x mask) => (VPROLQMasked512 [a] x (VPMOVVec64x8ToM mask)) -(MaskedRotateAllLeftUint32x4 [a] x mask) => (VPROLDMasked128 [a] x (VPMOVVec32x4ToM mask)) -(MaskedRotateAllLeftUint32x8 [a] x mask) => (VPROLDMasked256 [a] x (VPMOVVec32x8ToM mask)) -(MaskedRotateAllLeftUint32x16 [a] x mask) => (VPROLDMasked512 [a] x (VPMOVVec32x16ToM mask)) -(MaskedRotateAllLeftUint64x2 [a] x mask) => (VPROLQMasked128 [a] x (VPMOVVec64x2ToM mask)) -(MaskedRotateAllLeftUint64x4 [a] x mask) => (VPROLQMasked256 [a] x (VPMOVVec64x4ToM mask)) -(MaskedRotateAllLeftUint64x8 [a] x mask) => (VPROLQMasked512 [a] x (VPMOVVec64x8ToM mask)) -(MaskedRotateAllRightInt32x4 [a] x mask) => (VPRORDMasked128 [a] x (VPMOVVec32x4ToM mask)) -(MaskedRotateAllRightInt32x8 [a] x mask) => (VPRORDMasked256 [a] x (VPMOVVec32x8ToM mask)) -(MaskedRotateAllRightInt32x16 [a] x mask) => (VPRORDMasked512 [a] x (VPMOVVec32x16ToM mask)) -(MaskedRotateAllRightInt64x2 [a] x mask) => (VPRORQMasked128 [a] x (VPMOVVec64x2ToM mask)) -(MaskedRotateAllRightInt64x4 [a] x mask) => (VPRORQMasked256 [a] x (VPMOVVec64x4ToM mask)) -(MaskedRotateAllRightInt64x8 [a] x mask) => (VPRORQMasked512 [a] x (VPMOVVec64x8ToM mask)) -(MaskedRotateAllRightUint32x4 [a] x mask) => (VPRORDMasked128 [a] x (VPMOVVec32x4ToM mask)) -(MaskedRotateAllRightUint32x8 [a] x mask) => (VPRORDMasked256 [a] x (VPMOVVec32x8ToM mask)) -(MaskedRotateAllRightUint32x16 [a] x mask) => (VPRORDMasked512 [a] x (VPMOVVec32x16ToM mask)) -(MaskedRotateAllRightUint64x2 [a] x mask) => (VPRORQMasked128 [a] x (VPMOVVec64x2ToM mask)) -(MaskedRotateAllRightUint64x4 [a] x mask) => (VPRORQMasked256 [a] x (VPMOVVec64x4ToM mask)) -(MaskedRotateAllRightUint64x8 [a] x mask) => (VPRORQMasked512 [a] x (VPMOVVec64x8ToM mask)) -(MaskedRotateLeftInt32x4 x y mask) => (VPROLVDMasked128 x y (VPMOVVec32x4ToM mask)) -(MaskedRotateLeftInt32x8 x y mask) => (VPROLVDMasked256 x y (VPMOVVec32x8ToM mask)) -(MaskedRotateLeftInt32x16 x y mask) => (VPROLVDMasked512 x y (VPMOVVec32x16ToM mask)) -(MaskedRotateLeftInt64x2 x y mask) => (VPROLVQMasked128 x y (VPMOVVec64x2ToM mask)) -(MaskedRotateLeftInt64x4 x y mask) => (VPROLVQMasked256 x y (VPMOVVec64x4ToM mask)) -(MaskedRotateLeftInt64x8 x y mask) => (VPROLVQMasked512 x y (VPMOVVec64x8ToM mask)) -(MaskedRotateLeftUint32x4 x y mask) => (VPROLVDMasked128 x y (VPMOVVec32x4ToM mask)) -(MaskedRotateLeftUint32x8 x y mask) => (VPROLVDMasked256 x y (VPMOVVec32x8ToM mask)) -(MaskedRotateLeftUint32x16 x y mask) => (VPROLVDMasked512 x y (VPMOVVec32x16ToM mask)) -(MaskedRotateLeftUint64x2 x y mask) => (VPROLVQMasked128 x y (VPMOVVec64x2ToM mask)) -(MaskedRotateLeftUint64x4 x y mask) => (VPROLVQMasked256 x y (VPMOVVec64x4ToM mask)) -(MaskedRotateLeftUint64x8 x y mask) => (VPROLVQMasked512 x y (VPMOVVec64x8ToM mask)) -(MaskedRotateRightInt32x4 x y mask) => (VPRORVDMasked128 x y (VPMOVVec32x4ToM mask)) -(MaskedRotateRightInt32x8 x y mask) => (VPRORVDMasked256 x y (VPMOVVec32x8ToM mask)) -(MaskedRotateRightInt32x16 x y mask) => (VPRORVDMasked512 x y (VPMOVVec32x16ToM mask)) -(MaskedRotateRightInt64x2 x y mask) => (VPRORVQMasked128 x y (VPMOVVec64x2ToM mask)) -(MaskedRotateRightInt64x4 x y mask) => (VPRORVQMasked256 x y (VPMOVVec64x4ToM mask)) -(MaskedRotateRightInt64x8 x y mask) => (VPRORVQMasked512 x y (VPMOVVec64x8ToM mask)) -(MaskedRotateRightUint32x4 x y mask) => (VPRORVDMasked128 x y (VPMOVVec32x4ToM mask)) -(MaskedRotateRightUint32x8 x y mask) => (VPRORVDMasked256 x y (VPMOVVec32x8ToM mask)) -(MaskedRotateRightUint32x16 x y mask) => (VPRORVDMasked512 x y (VPMOVVec32x16ToM mask)) -(MaskedRotateRightUint64x2 x y mask) => (VPRORVQMasked128 x y (VPMOVVec64x2ToM mask)) -(MaskedRotateRightUint64x4 x y mask) => (VPRORVQMasked256 x y (VPMOVVec64x4ToM mask)) -(MaskedRotateRightUint64x8 x y mask) => (VPRORVQMasked512 x y (VPMOVVec64x8ToM mask)) -(MaskedRoundWithPrecisionFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+0] x (VPMOVVec32x4ToM mask)) -(MaskedRoundWithPrecisionFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+0] x (VPMOVVec32x8ToM mask)) -(MaskedRoundWithPrecisionFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+0] x (VPMOVVec32x16ToM mask)) -(MaskedRoundWithPrecisionFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+0] x (VPMOVVec64x2ToM mask)) -(MaskedRoundWithPrecisionFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+0] x (VPMOVVec64x4ToM mask)) -(MaskedRoundWithPrecisionFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+0] x (VPMOVVec64x8ToM mask)) -(MaskedSaturatedAddInt8x16 x y mask) => (VPADDSBMasked128 x y (VPMOVVec8x16ToM mask)) -(MaskedSaturatedAddInt8x32 x y mask) => (VPADDSBMasked256 x y (VPMOVVec8x32ToM mask)) -(MaskedSaturatedAddInt8x64 x y mask) => (VPADDSBMasked512 x y (VPMOVVec8x64ToM mask)) -(MaskedSaturatedAddInt16x8 x y mask) => (VPADDSWMasked128 x y (VPMOVVec16x8ToM mask)) -(MaskedSaturatedAddInt16x16 x y mask) => (VPADDSWMasked256 x y (VPMOVVec16x16ToM mask)) -(MaskedSaturatedAddInt16x32 x y mask) => (VPADDSWMasked512 x y (VPMOVVec16x32ToM mask)) -(MaskedSaturatedAddUint8x16 x y mask) => (VPADDSBMasked128 x y (VPMOVVec8x16ToM mask)) -(MaskedSaturatedAddUint8x32 x y mask) => (VPADDSBMasked256 x y (VPMOVVec8x32ToM mask)) -(MaskedSaturatedAddUint8x64 x y mask) => (VPADDSBMasked512 x y (VPMOVVec8x64ToM mask)) -(MaskedSaturatedAddUint16x8 x y mask) => (VPADDSWMasked128 x y (VPMOVVec16x8ToM mask)) -(MaskedSaturatedAddUint16x16 x y mask) => (VPADDSWMasked256 x y (VPMOVVec16x16ToM mask)) -(MaskedSaturatedAddUint16x32 x y mask) => (VPADDSWMasked512 x y (VPMOVVec16x32ToM mask)) -(MaskedSaturatedPairDotProdAccumulateInt32x4 x y z mask) => (VPDPWSSDSMasked128 x y z (VPMOVVec32x4ToM mask)) -(MaskedSaturatedPairDotProdAccumulateInt32x8 x y z mask) => (VPDPWSSDSMasked256 x y z (VPMOVVec32x8ToM mask)) -(MaskedSaturatedPairDotProdAccumulateInt32x16 x y z mask) => (VPDPWSSDSMasked512 x y z (VPMOVVec32x16ToM mask)) -(MaskedSaturatedSubInt8x16 x y mask) => (VPSUBSBMasked128 x y (VPMOVVec8x16ToM mask)) -(MaskedSaturatedSubInt8x32 x y mask) => (VPSUBSBMasked256 x y (VPMOVVec8x32ToM mask)) -(MaskedSaturatedSubInt8x64 x y mask) => (VPSUBSBMasked512 x y (VPMOVVec8x64ToM mask)) -(MaskedSaturatedSubInt16x8 x y mask) => (VPSUBSWMasked128 x y (VPMOVVec16x8ToM mask)) -(MaskedSaturatedSubInt16x16 x y mask) => (VPSUBSWMasked256 x y (VPMOVVec16x16ToM mask)) -(MaskedSaturatedSubInt16x32 x y mask) => (VPSUBSWMasked512 x y (VPMOVVec16x32ToM mask)) -(MaskedSaturatedSubUint8x16 x y mask) => (VPSUBSBMasked128 x y (VPMOVVec8x16ToM mask)) -(MaskedSaturatedSubUint8x32 x y mask) => (VPSUBSBMasked256 x y (VPMOVVec8x32ToM mask)) -(MaskedSaturatedSubUint8x64 x y mask) => (VPSUBSBMasked512 x y (VPMOVVec8x64ToM mask)) -(MaskedSaturatedSubUint16x8 x y mask) => (VPSUBSWMasked128 x y (VPMOVVec16x8ToM mask)) -(MaskedSaturatedSubUint16x16 x y mask) => (VPSUBSWMasked256 x y (VPMOVVec16x16ToM mask)) -(MaskedSaturatedSubUint16x32 x y mask) => (VPSUBSWMasked512 x y (VPMOVVec16x32ToM mask)) -(MaskedSaturatedUnsignedSignedPairDotProdUint8x16 x y mask) => (VPMADDUBSWMasked128 x y (VPMOVVec16x8ToM mask)) -(MaskedSaturatedUnsignedSignedPairDotProdUint8x32 x y mask) => (VPMADDUBSWMasked256 x y (VPMOVVec16x16ToM mask)) -(MaskedSaturatedUnsignedSignedPairDotProdUint8x64 x y mask) => (VPMADDUBSWMasked512 x y (VPMOVVec16x32ToM mask)) -(MaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4 x y z mask) => (VPDPBUSDSMasked128 x y z (VPMOVVec32x4ToM mask)) -(MaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8 x y z mask) => (VPDPBUSDSMasked256 x y z (VPMOVVec32x8ToM mask)) -(MaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16 x y z mask) => (VPDPBUSDSMasked512 x y z (VPMOVVec32x16ToM mask)) -(MaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4 x y z mask) => (VPDPBUSDSMasked128 x y z (VPMOVVec32x4ToM mask)) -(MaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8 x y z mask) => (VPDPBUSDSMasked256 x y z (VPMOVVec32x8ToM mask)) -(MaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16 x y z mask) => (VPDPBUSDSMasked512 x y z (VPMOVVec32x16ToM mask)) -(MaskedShiftAllLeftInt64x2 x y mask) => (VPSLLQMasked128 x y (VPMOVVec64x2ToM mask)) -(MaskedShiftAllLeftInt64x4 x y mask) => (VPSLLQMasked256 x y (VPMOVVec64x4ToM mask)) -(MaskedShiftAllLeftInt64x8 x y mask) => (VPSLLQMasked512 x y (VPMOVVec64x8ToM mask)) -(MaskedShiftAllLeftUint64x2 x y mask) => (VPSLLQMasked128 x y (VPMOVVec64x2ToM mask)) -(MaskedShiftAllLeftUint64x4 x y mask) => (VPSLLQMasked256 x y (VPMOVVec64x4ToM mask)) -(MaskedShiftAllLeftUint64x8 x y mask) => (VPSLLQMasked512 x y (VPMOVVec64x8ToM mask)) -(MaskedShiftAllLeftAndFillUpperFromInt16x8 [a] x y mask) => (VPSHLDWMasked128 [a] x y (VPMOVVec16x8ToM mask)) -(MaskedShiftAllLeftAndFillUpperFromInt16x16 [a] x y mask) => (VPSHLDWMasked256 [a] x y (VPMOVVec16x16ToM mask)) -(MaskedShiftAllLeftAndFillUpperFromInt16x32 [a] x y mask) => (VPSHLDWMasked512 [a] x y (VPMOVVec16x32ToM mask)) -(MaskedShiftAllLeftAndFillUpperFromInt32x4 [a] x y mask) => (VPSHLDDMasked128 [a] x y (VPMOVVec32x4ToM mask)) -(MaskedShiftAllLeftAndFillUpperFromInt32x8 [a] x y mask) => (VPSHLDDMasked256 [a] x y (VPMOVVec32x8ToM mask)) -(MaskedShiftAllLeftAndFillUpperFromInt32x16 [a] x y mask) => (VPSHLDDMasked512 [a] x y (VPMOVVec32x16ToM mask)) -(MaskedShiftAllLeftAndFillUpperFromInt64x2 [a] x y mask) => (VPSHLDQMasked128 [a] x y (VPMOVVec64x2ToM mask)) -(MaskedShiftAllLeftAndFillUpperFromInt64x4 [a] x y mask) => (VPSHLDQMasked256 [a] x y (VPMOVVec64x4ToM mask)) -(MaskedShiftAllLeftAndFillUpperFromInt64x8 [a] x y mask) => (VPSHLDQMasked512 [a] x y (VPMOVVec64x8ToM mask)) -(MaskedShiftAllLeftAndFillUpperFromUint16x8 [a] x y mask) => (VPSHLDWMasked128 [a] x y (VPMOVVec16x8ToM mask)) -(MaskedShiftAllLeftAndFillUpperFromUint16x16 [a] x y mask) => (VPSHLDWMasked256 [a] x y (VPMOVVec16x16ToM mask)) -(MaskedShiftAllLeftAndFillUpperFromUint16x32 [a] x y mask) => (VPSHLDWMasked512 [a] x y (VPMOVVec16x32ToM mask)) -(MaskedShiftAllLeftAndFillUpperFromUint32x4 [a] x y mask) => (VPSHLDDMasked128 [a] x y (VPMOVVec32x4ToM mask)) -(MaskedShiftAllLeftAndFillUpperFromUint32x8 [a] x y mask) => (VPSHLDDMasked256 [a] x y (VPMOVVec32x8ToM mask)) -(MaskedShiftAllLeftAndFillUpperFromUint32x16 [a] x y mask) => (VPSHLDDMasked512 [a] x y (VPMOVVec32x16ToM mask)) -(MaskedShiftAllLeftAndFillUpperFromUint64x2 [a] x y mask) => (VPSHLDQMasked128 [a] x y (VPMOVVec64x2ToM mask)) -(MaskedShiftAllLeftAndFillUpperFromUint64x4 [a] x y mask) => (VPSHLDQMasked256 [a] x y (VPMOVVec64x4ToM mask)) -(MaskedShiftAllLeftAndFillUpperFromUint64x8 [a] x y mask) => (VPSHLDQMasked512 [a] x y (VPMOVVec64x8ToM mask)) -(MaskedShiftAllRightInt64x2 x y mask) => (VPSRLQMasked128 x y (VPMOVVec64x2ToM mask)) -(MaskedShiftAllRightInt64x4 x y mask) => (VPSRLQMasked256 x y (VPMOVVec64x4ToM mask)) -(MaskedShiftAllRightInt64x8 x y mask) => (VPSRLQMasked512 x y (VPMOVVec64x8ToM mask)) -(MaskedShiftAllRightUint64x2 x y mask) => (VPSRLQMasked128 x y (VPMOVVec64x2ToM mask)) -(MaskedShiftAllRightUint64x4 x y mask) => (VPSRLQMasked256 x y (VPMOVVec64x4ToM mask)) -(MaskedShiftAllRightUint64x8 x y mask) => (VPSRLQMasked512 x y (VPMOVVec64x8ToM mask)) -(MaskedShiftAllRightAndFillUpperFromInt16x8 [a] x y mask) => (VPSHRDWMasked128 [a] x y (VPMOVVec16x8ToM mask)) -(MaskedShiftAllRightAndFillUpperFromInt16x16 [a] x y mask) => (VPSHRDWMasked256 [a] x y (VPMOVVec16x16ToM mask)) -(MaskedShiftAllRightAndFillUpperFromInt16x32 [a] x y mask) => (VPSHRDWMasked512 [a] x y (VPMOVVec16x32ToM mask)) -(MaskedShiftAllRightAndFillUpperFromInt32x4 [a] x y mask) => (VPSHRDDMasked128 [a] x y (VPMOVVec32x4ToM mask)) -(MaskedShiftAllRightAndFillUpperFromInt32x8 [a] x y mask) => (VPSHRDDMasked256 [a] x y (VPMOVVec32x8ToM mask)) -(MaskedShiftAllRightAndFillUpperFromInt32x16 [a] x y mask) => (VPSHRDDMasked512 [a] x y (VPMOVVec32x16ToM mask)) -(MaskedShiftAllRightAndFillUpperFromInt64x2 [a] x y mask) => (VPSHRDQMasked128 [a] x y (VPMOVVec64x2ToM mask)) -(MaskedShiftAllRightAndFillUpperFromInt64x4 [a] x y mask) => (VPSHRDQMasked256 [a] x y (VPMOVVec64x4ToM mask)) -(MaskedShiftAllRightAndFillUpperFromInt64x8 [a] x y mask) => (VPSHRDQMasked512 [a] x y (VPMOVVec64x8ToM mask)) -(MaskedShiftAllRightAndFillUpperFromUint16x8 [a] x y mask) => (VPSHRDWMasked128 [a] x y (VPMOVVec16x8ToM mask)) -(MaskedShiftAllRightAndFillUpperFromUint16x16 [a] x y mask) => (VPSHRDWMasked256 [a] x y (VPMOVVec16x16ToM mask)) -(MaskedShiftAllRightAndFillUpperFromUint16x32 [a] x y mask) => (VPSHRDWMasked512 [a] x y (VPMOVVec16x32ToM mask)) -(MaskedShiftAllRightAndFillUpperFromUint32x4 [a] x y mask) => (VPSHRDDMasked128 [a] x y (VPMOVVec32x4ToM mask)) -(MaskedShiftAllRightAndFillUpperFromUint32x8 [a] x y mask) => (VPSHRDDMasked256 [a] x y (VPMOVVec32x8ToM mask)) -(MaskedShiftAllRightAndFillUpperFromUint32x16 [a] x y mask) => (VPSHRDDMasked512 [a] x y (VPMOVVec32x16ToM mask)) -(MaskedShiftAllRightAndFillUpperFromUint64x2 [a] x y mask) => (VPSHRDQMasked128 [a] x y (VPMOVVec64x2ToM mask)) -(MaskedShiftAllRightAndFillUpperFromUint64x4 [a] x y mask) => (VPSHRDQMasked256 [a] x y (VPMOVVec64x4ToM mask)) -(MaskedShiftAllRightAndFillUpperFromUint64x8 [a] x y mask) => (VPSHRDQMasked512 [a] x y (VPMOVVec64x8ToM mask)) -(MaskedShiftAllRightSignExtendedInt64x2 x y mask) => (VPSRAQMasked128 x y (VPMOVVec64x2ToM mask)) -(MaskedShiftAllRightSignExtendedInt64x4 x y mask) => (VPSRAQMasked256 x y (VPMOVVec64x4ToM mask)) -(MaskedShiftAllRightSignExtendedInt64x8 x y mask) => (VPSRAQMasked512 x y (VPMOVVec64x8ToM mask)) -(MaskedShiftLeftInt16x8 x y mask) => (VPSLLVWMasked128 x y (VPMOVVec16x8ToM mask)) -(MaskedShiftLeftInt16x16 x y mask) => (VPSLLVWMasked256 x y (VPMOVVec16x16ToM mask)) -(MaskedShiftLeftInt16x32 x y mask) => (VPSLLVWMasked512 x y (VPMOVVec16x32ToM mask)) -(MaskedShiftLeftInt32x4 x y mask) => (VPSLLVDMasked128 x y (VPMOVVec32x4ToM mask)) -(MaskedShiftLeftInt32x8 x y mask) => (VPSLLVDMasked256 x y (VPMOVVec32x8ToM mask)) -(MaskedShiftLeftInt32x16 x y mask) => (VPSLLVDMasked512 x y (VPMOVVec32x16ToM mask)) -(MaskedShiftLeftInt64x2 x y mask) => (VPSLLVQMasked128 x y (VPMOVVec64x2ToM mask)) -(MaskedShiftLeftInt64x4 x y mask) => (VPSLLVQMasked256 x y (VPMOVVec64x4ToM mask)) -(MaskedShiftLeftInt64x8 x y mask) => (VPSLLVQMasked512 x y (VPMOVVec64x8ToM mask)) -(MaskedShiftLeftUint16x8 x y mask) => (VPSLLVWMasked128 x y (VPMOVVec16x8ToM mask)) -(MaskedShiftLeftUint16x16 x y mask) => (VPSLLVWMasked256 x y (VPMOVVec16x16ToM mask)) -(MaskedShiftLeftUint16x32 x y mask) => (VPSLLVWMasked512 x y (VPMOVVec16x32ToM mask)) -(MaskedShiftLeftUint32x4 x y mask) => (VPSLLVDMasked128 x y (VPMOVVec32x4ToM mask)) -(MaskedShiftLeftUint32x8 x y mask) => (VPSLLVDMasked256 x y (VPMOVVec32x8ToM mask)) -(MaskedShiftLeftUint32x16 x y mask) => (VPSLLVDMasked512 x y (VPMOVVec32x16ToM mask)) -(MaskedShiftLeftUint64x2 x y mask) => (VPSLLVQMasked128 x y (VPMOVVec64x2ToM mask)) -(MaskedShiftLeftUint64x4 x y mask) => (VPSLLVQMasked256 x y (VPMOVVec64x4ToM mask)) -(MaskedShiftLeftUint64x8 x y mask) => (VPSLLVQMasked512 x y (VPMOVVec64x8ToM mask)) -(MaskedShiftLeftAndFillUpperFromInt16x8 x y z mask) => (VPSHLDVWMasked128 x y z (VPMOVVec16x8ToM mask)) -(MaskedShiftLeftAndFillUpperFromInt16x16 x y z mask) => (VPSHLDVWMasked256 x y z (VPMOVVec16x16ToM mask)) -(MaskedShiftLeftAndFillUpperFromInt16x32 x y z mask) => (VPSHLDVWMasked512 x y z (VPMOVVec16x32ToM mask)) -(MaskedShiftLeftAndFillUpperFromInt32x4 x y z mask) => (VPSHLDVDMasked128 x y z (VPMOVVec32x4ToM mask)) -(MaskedShiftLeftAndFillUpperFromInt32x8 x y z mask) => (VPSHLDVDMasked256 x y z (VPMOVVec32x8ToM mask)) -(MaskedShiftLeftAndFillUpperFromInt32x16 x y z mask) => (VPSHLDVDMasked512 x y z (VPMOVVec32x16ToM mask)) -(MaskedShiftLeftAndFillUpperFromInt64x2 x y z mask) => (VPSHLDVQMasked128 x y z (VPMOVVec64x2ToM mask)) -(MaskedShiftLeftAndFillUpperFromInt64x4 x y z mask) => (VPSHLDVQMasked256 x y z (VPMOVVec64x4ToM mask)) -(MaskedShiftLeftAndFillUpperFromInt64x8 x y z mask) => (VPSHLDVQMasked512 x y z (VPMOVVec64x8ToM mask)) -(MaskedShiftLeftAndFillUpperFromUint16x8 x y z mask) => (VPSHLDVWMasked128 x y z (VPMOVVec16x8ToM mask)) -(MaskedShiftLeftAndFillUpperFromUint16x16 x y z mask) => (VPSHLDVWMasked256 x y z (VPMOVVec16x16ToM mask)) -(MaskedShiftLeftAndFillUpperFromUint16x32 x y z mask) => (VPSHLDVWMasked512 x y z (VPMOVVec16x32ToM mask)) -(MaskedShiftLeftAndFillUpperFromUint32x4 x y z mask) => (VPSHLDVDMasked128 x y z (VPMOVVec32x4ToM mask)) -(MaskedShiftLeftAndFillUpperFromUint32x8 x y z mask) => (VPSHLDVDMasked256 x y z (VPMOVVec32x8ToM mask)) -(MaskedShiftLeftAndFillUpperFromUint32x16 x y z mask) => (VPSHLDVDMasked512 x y z (VPMOVVec32x16ToM mask)) -(MaskedShiftLeftAndFillUpperFromUint64x2 x y z mask) => (VPSHLDVQMasked128 x y z (VPMOVVec64x2ToM mask)) -(MaskedShiftLeftAndFillUpperFromUint64x4 x y z mask) => (VPSHLDVQMasked256 x y z (VPMOVVec64x4ToM mask)) -(MaskedShiftLeftAndFillUpperFromUint64x8 x y z mask) => (VPSHLDVQMasked512 x y z (VPMOVVec64x8ToM mask)) -(MaskedShiftRightInt16x8 x y mask) => (VPSRLVWMasked128 x y (VPMOVVec16x8ToM mask)) -(MaskedShiftRightInt16x16 x y mask) => (VPSRLVWMasked256 x y (VPMOVVec16x16ToM mask)) -(MaskedShiftRightInt16x32 x y mask) => (VPSRLVWMasked512 x y (VPMOVVec16x32ToM mask)) -(MaskedShiftRightInt32x4 x y mask) => (VPSRLVDMasked128 x y (VPMOVVec32x4ToM mask)) -(MaskedShiftRightInt32x8 x y mask) => (VPSRLVDMasked256 x y (VPMOVVec32x8ToM mask)) -(MaskedShiftRightInt32x16 x y mask) => (VPSRLVDMasked512 x y (VPMOVVec32x16ToM mask)) -(MaskedShiftRightInt64x2 x y mask) => (VPSRLVQMasked128 x y (VPMOVVec64x2ToM mask)) -(MaskedShiftRightInt64x4 x y mask) => (VPSRLVQMasked256 x y (VPMOVVec64x4ToM mask)) -(MaskedShiftRightInt64x8 x y mask) => (VPSRLVQMasked512 x y (VPMOVVec64x8ToM mask)) -(MaskedShiftRightUint16x8 x y mask) => (VPSRLVWMasked128 x y (VPMOVVec16x8ToM mask)) -(MaskedShiftRightUint16x16 x y mask) => (VPSRLVWMasked256 x y (VPMOVVec16x16ToM mask)) -(MaskedShiftRightUint16x32 x y mask) => (VPSRLVWMasked512 x y (VPMOVVec16x32ToM mask)) -(MaskedShiftRightUint32x4 x y mask) => (VPSRLVDMasked128 x y (VPMOVVec32x4ToM mask)) -(MaskedShiftRightUint32x8 x y mask) => (VPSRLVDMasked256 x y (VPMOVVec32x8ToM mask)) -(MaskedShiftRightUint32x16 x y mask) => (VPSRLVDMasked512 x y (VPMOVVec32x16ToM mask)) -(MaskedShiftRightUint64x2 x y mask) => (VPSRLVQMasked128 x y (VPMOVVec64x2ToM mask)) -(MaskedShiftRightUint64x4 x y mask) => (VPSRLVQMasked256 x y (VPMOVVec64x4ToM mask)) -(MaskedShiftRightUint64x8 x y mask) => (VPSRLVQMasked512 x y (VPMOVVec64x8ToM mask)) -(MaskedShiftRightAndFillUpperFromInt16x8 x y z mask) => (VPSHRDVWMasked128 x y z (VPMOVVec16x8ToM mask)) -(MaskedShiftRightAndFillUpperFromInt16x16 x y z mask) => (VPSHRDVWMasked256 x y z (VPMOVVec16x16ToM mask)) -(MaskedShiftRightAndFillUpperFromInt16x32 x y z mask) => (VPSHRDVWMasked512 x y z (VPMOVVec16x32ToM mask)) -(MaskedShiftRightAndFillUpperFromInt32x4 x y z mask) => (VPSHRDVDMasked128 x y z (VPMOVVec32x4ToM mask)) -(MaskedShiftRightAndFillUpperFromInt32x8 x y z mask) => (VPSHRDVDMasked256 x y z (VPMOVVec32x8ToM mask)) -(MaskedShiftRightAndFillUpperFromInt32x16 x y z mask) => (VPSHRDVDMasked512 x y z (VPMOVVec32x16ToM mask)) -(MaskedShiftRightAndFillUpperFromInt64x2 x y z mask) => (VPSHRDVQMasked128 x y z (VPMOVVec64x2ToM mask)) -(MaskedShiftRightAndFillUpperFromInt64x4 x y z mask) => (VPSHRDVQMasked256 x y z (VPMOVVec64x4ToM mask)) -(MaskedShiftRightAndFillUpperFromInt64x8 x y z mask) => (VPSHRDVQMasked512 x y z (VPMOVVec64x8ToM mask)) -(MaskedShiftRightAndFillUpperFromUint16x8 x y z mask) => (VPSHRDVWMasked128 x y z (VPMOVVec16x8ToM mask)) -(MaskedShiftRightAndFillUpperFromUint16x16 x y z mask) => (VPSHRDVWMasked256 x y z (VPMOVVec16x16ToM mask)) -(MaskedShiftRightAndFillUpperFromUint16x32 x y z mask) => (VPSHRDVWMasked512 x y z (VPMOVVec16x32ToM mask)) -(MaskedShiftRightAndFillUpperFromUint32x4 x y z mask) => (VPSHRDVDMasked128 x y z (VPMOVVec32x4ToM mask)) -(MaskedShiftRightAndFillUpperFromUint32x8 x y z mask) => (VPSHRDVDMasked256 x y z (VPMOVVec32x8ToM mask)) -(MaskedShiftRightAndFillUpperFromUint32x16 x y z mask) => (VPSHRDVDMasked512 x y z (VPMOVVec32x16ToM mask)) -(MaskedShiftRightAndFillUpperFromUint64x2 x y z mask) => (VPSHRDVQMasked128 x y z (VPMOVVec64x2ToM mask)) -(MaskedShiftRightAndFillUpperFromUint64x4 x y z mask) => (VPSHRDVQMasked256 x y z (VPMOVVec64x4ToM mask)) -(MaskedShiftRightAndFillUpperFromUint64x8 x y z mask) => (VPSHRDVQMasked512 x y z (VPMOVVec64x8ToM mask)) -(MaskedShiftRightSignExtendedInt16x8 x y mask) => (VPSRAVWMasked128 x y (VPMOVVec16x8ToM mask)) -(MaskedShiftRightSignExtendedInt16x16 x y mask) => (VPSRAVWMasked256 x y (VPMOVVec16x16ToM mask)) -(MaskedShiftRightSignExtendedInt16x32 x y mask) => (VPSRAVWMasked512 x y (VPMOVVec16x32ToM mask)) -(MaskedShiftRightSignExtendedInt32x4 x y mask) => (VPSRAVDMasked128 x y (VPMOVVec32x4ToM mask)) -(MaskedShiftRightSignExtendedInt32x8 x y mask) => (VPSRAVDMasked256 x y (VPMOVVec32x8ToM mask)) -(MaskedShiftRightSignExtendedInt32x16 x y mask) => (VPSRAVDMasked512 x y (VPMOVVec32x16ToM mask)) -(MaskedShiftRightSignExtendedInt64x2 x y mask) => (VPSRAVQMasked128 x y (VPMOVVec64x2ToM mask)) -(MaskedShiftRightSignExtendedInt64x4 x y mask) => (VPSRAVQMasked256 x y (VPMOVVec64x4ToM mask)) -(MaskedShiftRightSignExtendedInt64x8 x y mask) => (VPSRAVQMasked512 x y (VPMOVVec64x8ToM mask)) -(MaskedShiftRightSignExtendedUint16x8 x y mask) => (VPSRAVWMasked128 x y (VPMOVVec16x8ToM mask)) -(MaskedShiftRightSignExtendedUint16x16 x y mask) => (VPSRAVWMasked256 x y (VPMOVVec16x16ToM mask)) -(MaskedShiftRightSignExtendedUint16x32 x y mask) => (VPSRAVWMasked512 x y (VPMOVVec16x32ToM mask)) -(MaskedShiftRightSignExtendedUint32x4 x y mask) => (VPSRAVDMasked128 x y (VPMOVVec32x4ToM mask)) -(MaskedShiftRightSignExtendedUint32x8 x y mask) => (VPSRAVDMasked256 x y (VPMOVVec32x8ToM mask)) -(MaskedShiftRightSignExtendedUint32x16 x y mask) => (VPSRAVDMasked512 x y (VPMOVVec32x16ToM mask)) -(MaskedShiftRightSignExtendedUint64x2 x y mask) => (VPSRAVQMasked128 x y (VPMOVVec64x2ToM mask)) -(MaskedShiftRightSignExtendedUint64x4 x y mask) => (VPSRAVQMasked256 x y (VPMOVVec64x4ToM mask)) -(MaskedShiftRightSignExtendedUint64x8 x y mask) => (VPSRAVQMasked512 x y (VPMOVVec64x8ToM mask)) -(MaskedSqrtFloat32x4 x mask) => (VSQRTPSMasked128 x (VPMOVVec32x4ToM mask)) -(MaskedSqrtFloat32x8 x mask) => (VSQRTPSMasked256 x (VPMOVVec32x8ToM mask)) -(MaskedSqrtFloat32x16 x mask) => (VSQRTPSMasked512 x (VPMOVVec32x16ToM mask)) -(MaskedSqrtFloat64x2 x mask) => (VSQRTPDMasked128 x (VPMOVVec64x2ToM mask)) -(MaskedSqrtFloat64x4 x mask) => (VSQRTPDMasked256 x (VPMOVVec64x4ToM mask)) -(MaskedSqrtFloat64x8 x mask) => (VSQRTPDMasked512 x (VPMOVVec64x8ToM mask)) -(MaskedSubFloat32x4 x y mask) => (VSUBPSMasked128 x y (VPMOVVec32x4ToM mask)) -(MaskedSubFloat32x8 x y mask) => (VSUBPSMasked256 x y (VPMOVVec32x8ToM mask)) -(MaskedSubFloat32x16 x y mask) => (VSUBPSMasked512 x y (VPMOVVec32x16ToM mask)) -(MaskedSubFloat64x2 x y mask) => (VSUBPDMasked128 x y (VPMOVVec64x2ToM mask)) -(MaskedSubFloat64x4 x y mask) => (VSUBPDMasked256 x y (VPMOVVec64x4ToM mask)) -(MaskedSubFloat64x8 x y mask) => (VSUBPDMasked512 x y (VPMOVVec64x8ToM mask)) -(MaskedSubInt8x16 x y mask) => (VPSUBBMasked128 x y (VPMOVVec8x16ToM mask)) -(MaskedSubInt8x32 x y mask) => (VPSUBBMasked256 x y (VPMOVVec8x32ToM mask)) -(MaskedSubInt8x64 x y mask) => (VPSUBBMasked512 x y (VPMOVVec8x64ToM mask)) -(MaskedSubInt16x8 x y mask) => (VPSUBWMasked128 x y (VPMOVVec16x8ToM mask)) -(MaskedSubInt16x16 x y mask) => (VPSUBWMasked256 x y (VPMOVVec16x16ToM mask)) -(MaskedSubInt16x32 x y mask) => (VPSUBWMasked512 x y (VPMOVVec16x32ToM mask)) -(MaskedSubInt32x4 x y mask) => (VPSUBDMasked128 x y (VPMOVVec32x4ToM mask)) -(MaskedSubInt32x8 x y mask) => (VPSUBDMasked256 x y (VPMOVVec32x8ToM mask)) -(MaskedSubInt32x16 x y mask) => (VPSUBDMasked512 x y (VPMOVVec32x16ToM mask)) -(MaskedSubInt64x2 x y mask) => (VPSUBQMasked128 x y (VPMOVVec64x2ToM mask)) -(MaskedSubInt64x4 x y mask) => (VPSUBQMasked256 x y (VPMOVVec64x4ToM mask)) -(MaskedSubInt64x8 x y mask) => (VPSUBQMasked512 x y (VPMOVVec64x8ToM mask)) -(MaskedSubUint8x16 x y mask) => (VPSUBBMasked128 x y (VPMOVVec8x16ToM mask)) -(MaskedSubUint8x32 x y mask) => (VPSUBBMasked256 x y (VPMOVVec8x32ToM mask)) -(MaskedSubUint8x64 x y mask) => (VPSUBBMasked512 x y (VPMOVVec8x64ToM mask)) -(MaskedSubUint16x8 x y mask) => (VPSUBWMasked128 x y (VPMOVVec16x8ToM mask)) -(MaskedSubUint16x16 x y mask) => (VPSUBWMasked256 x y (VPMOVVec16x16ToM mask)) -(MaskedSubUint16x32 x y mask) => (VPSUBWMasked512 x y (VPMOVVec16x32ToM mask)) -(MaskedSubUint32x4 x y mask) => (VPSUBDMasked128 x y (VPMOVVec32x4ToM mask)) -(MaskedSubUint32x8 x y mask) => (VPSUBDMasked256 x y (VPMOVVec32x8ToM mask)) -(MaskedSubUint32x16 x y mask) => (VPSUBDMasked512 x y (VPMOVVec32x16ToM mask)) -(MaskedSubUint64x2 x y mask) => (VPSUBQMasked128 x y (VPMOVVec64x2ToM mask)) -(MaskedSubUint64x4 x y mask) => (VPSUBQMasked256 x y (VPMOVVec64x4ToM mask)) -(MaskedSubUint64x8 x y mask) => (VPSUBQMasked512 x y (VPMOVVec64x8ToM mask)) -(MaskedTruncWithPrecisionFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+3] x (VPMOVVec32x4ToM mask)) -(MaskedTruncWithPrecisionFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+3] x (VPMOVVec32x8ToM mask)) -(MaskedTruncWithPrecisionFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+3] x (VPMOVVec32x16ToM mask)) -(MaskedTruncWithPrecisionFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+3] x (VPMOVVec64x2ToM mask)) -(MaskedTruncWithPrecisionFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+3] x (VPMOVVec64x4ToM mask)) -(MaskedTruncWithPrecisionFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+3] x (VPMOVVec64x8ToM mask)) -(MaskedUnsignedSignedQuadDotProdAccumulateInt32x4 x y z mask) => (VPDPBUSDMasked128 x y z (VPMOVVec32x4ToM mask)) -(MaskedUnsignedSignedQuadDotProdAccumulateInt32x8 x y z mask) => (VPDPBUSDMasked256 x y z (VPMOVVec32x8ToM mask)) -(MaskedUnsignedSignedQuadDotProdAccumulateInt32x16 x y z mask) => (VPDPBUSDMasked512 x y z (VPMOVVec32x16ToM mask)) -(MaskedUnsignedSignedQuadDotProdAccumulateUint32x4 x y z mask) => (VPDPBUSDMasked128 x y z (VPMOVVec32x4ToM mask)) -(MaskedUnsignedSignedQuadDotProdAccumulateUint32x8 x y z mask) => (VPDPBUSDMasked256 x y z (VPMOVVec32x8ToM mask)) -(MaskedUnsignedSignedQuadDotProdAccumulateUint32x16 x y z mask) => (VPDPBUSDMasked512 x y z (VPMOVVec32x16ToM mask)) -(MaskedXorInt32x4 x y mask) => (VPXORDMasked128 x y (VPMOVVec32x4ToM mask)) -(MaskedXorInt32x8 x y mask) => (VPXORDMasked256 x y (VPMOVVec32x8ToM mask)) -(MaskedXorInt32x16 x y mask) => (VPXORDMasked512 x y (VPMOVVec32x16ToM mask)) -(MaskedXorInt64x2 x y mask) => (VPXORQMasked128 x y (VPMOVVec64x2ToM mask)) -(MaskedXorInt64x4 x y mask) => (VPXORQMasked256 x y (VPMOVVec64x4ToM mask)) -(MaskedXorInt64x8 x y mask) => (VPXORQMasked512 x y (VPMOVVec64x8ToM mask)) -(MaskedXorUint32x4 x y mask) => (VPXORDMasked128 x y (VPMOVVec32x4ToM mask)) -(MaskedXorUint32x8 x y mask) => (VPXORDMasked256 x y (VPMOVVec32x8ToM mask)) -(MaskedXorUint32x16 x y mask) => (VPXORDMasked512 x y (VPMOVVec32x16ToM mask)) -(MaskedXorUint64x2 x y mask) => (VPXORQMasked128 x y (VPMOVVec64x2ToM mask)) -(MaskedXorUint64x4 x y mask) => (VPXORQMasked256 x y (VPMOVVec64x4ToM mask)) -(MaskedXorUint64x8 x y mask) => (VPXORQMasked512 x y (VPMOVVec64x8ToM mask)) +(LessEqualMaskedFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [2] x y (VPMOVVec32x4ToM mask))) +(LessEqualMaskedFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [2] x y (VPMOVVec32x8ToM mask))) +(LessEqualMaskedFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [2] x y (VPMOVVec32x16ToM mask))) +(LessEqualMaskedFloat64x2 x y mask) => (VPMOVMToVec64x2 (VCMPPDMasked128 [2] x y (VPMOVVec64x2ToM mask))) +(LessEqualMaskedFloat64x4 x y mask) => (VPMOVMToVec64x4 (VCMPPDMasked256 [2] x y (VPMOVVec64x4ToM mask))) +(LessEqualMaskedFloat64x8 x y mask) => (VPMOVMToVec64x8 (VCMPPDMasked512 [2] x y (VPMOVVec64x8ToM mask))) +(LessEqualMaskedInt8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPBMasked128 [2] x y (VPMOVVec8x16ToM mask))) +(LessEqualMaskedInt8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPBMasked256 [2] x y (VPMOVVec8x32ToM mask))) +(LessEqualMaskedInt8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPBMasked512 [2] x y (VPMOVVec8x64ToM mask))) +(LessEqualMaskedInt16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPWMasked128 [2] x y (VPMOVVec16x8ToM mask))) +(LessEqualMaskedInt16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPWMasked256 [2] x y (VPMOVVec16x16ToM mask))) +(LessEqualMaskedInt16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPWMasked512 [2] x y (VPMOVVec16x32ToM mask))) +(LessEqualMaskedInt32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPDMasked128 [2] x y (VPMOVVec32x4ToM mask))) +(LessEqualMaskedInt32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPDMasked256 [2] x y (VPMOVVec32x8ToM mask))) +(LessEqualMaskedInt32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPDMasked512 [2] x y (VPMOVVec32x16ToM mask))) +(LessEqualMaskedInt64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPQMasked128 [2] x y (VPMOVVec64x2ToM mask))) +(LessEqualMaskedInt64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPQMasked256 [2] x y (VPMOVVec64x4ToM mask))) +(LessEqualMaskedInt64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPQMasked512 [2] x y (VPMOVVec64x8ToM mask))) +(LessEqualMaskedUint8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPUBMasked128 [2] x y (VPMOVVec8x16ToM mask))) +(LessEqualMaskedUint8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPUBMasked256 [2] x y (VPMOVVec8x32ToM mask))) +(LessEqualMaskedUint8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPUBMasked512 [2] x y (VPMOVVec8x64ToM mask))) +(LessEqualMaskedUint16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPUWMasked128 [2] x y (VPMOVVec16x8ToM mask))) +(LessEqualMaskedUint16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPUWMasked256 [2] x y (VPMOVVec16x16ToM mask))) +(LessEqualMaskedUint16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPUWMasked512 [2] x y (VPMOVVec16x32ToM mask))) +(LessEqualMaskedUint32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPUDMasked128 [2] x y (VPMOVVec32x4ToM mask))) +(LessEqualMaskedUint32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPUDMasked256 [2] x y (VPMOVVec32x8ToM mask))) +(LessEqualMaskedUint32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPUDMasked512 [2] x y (VPMOVVec32x16ToM mask))) +(LessEqualMaskedUint64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPUQMasked128 [2] x y (VPMOVVec64x2ToM mask))) +(LessEqualMaskedUint64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPUQMasked256 [2] x y (VPMOVVec64x4ToM mask))) +(LessEqualMaskedUint64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPUQMasked512 [2] x y (VPMOVVec64x8ToM mask))) +(LessMaskedFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [1] x y (VPMOVVec32x4ToM mask))) +(LessMaskedFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [1] x y (VPMOVVec32x8ToM mask))) +(LessMaskedFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [1] x y (VPMOVVec32x16ToM mask))) +(LessMaskedFloat64x2 x y mask) => (VPMOVMToVec64x2 (VCMPPDMasked128 [1] x y (VPMOVVec64x2ToM mask))) +(LessMaskedFloat64x4 x y mask) => (VPMOVMToVec64x4 (VCMPPDMasked256 [1] x y (VPMOVVec64x4ToM mask))) +(LessMaskedFloat64x8 x y mask) => (VPMOVMToVec64x8 (VCMPPDMasked512 [1] x y (VPMOVVec64x8ToM mask))) +(LessMaskedInt8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPBMasked128 [1] x y (VPMOVVec8x16ToM mask))) +(LessMaskedInt8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPBMasked256 [1] x y (VPMOVVec8x32ToM mask))) +(LessMaskedInt8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPBMasked512 [1] x y (VPMOVVec8x64ToM mask))) +(LessMaskedInt16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPWMasked128 [1] x y (VPMOVVec16x8ToM mask))) +(LessMaskedInt16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPWMasked256 [1] x y (VPMOVVec16x16ToM mask))) +(LessMaskedInt16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPWMasked512 [1] x y (VPMOVVec16x32ToM mask))) +(LessMaskedInt32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPDMasked128 [1] x y (VPMOVVec32x4ToM mask))) +(LessMaskedInt32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPDMasked256 [1] x y (VPMOVVec32x8ToM mask))) +(LessMaskedInt32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPDMasked512 [1] x y (VPMOVVec32x16ToM mask))) +(LessMaskedInt64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPQMasked128 [1] x y (VPMOVVec64x2ToM mask))) +(LessMaskedInt64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPQMasked256 [1] x y (VPMOVVec64x4ToM mask))) +(LessMaskedInt64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPQMasked512 [1] x y (VPMOVVec64x8ToM mask))) +(LessMaskedUint8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPUBMasked128 [1] x y (VPMOVVec8x16ToM mask))) +(LessMaskedUint8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPUBMasked256 [1] x y (VPMOVVec8x32ToM mask))) +(LessMaskedUint8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPUBMasked512 [1] x y (VPMOVVec8x64ToM mask))) +(LessMaskedUint16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPUWMasked128 [1] x y (VPMOVVec16x8ToM mask))) +(LessMaskedUint16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPUWMasked256 [1] x y (VPMOVVec16x16ToM mask))) +(LessMaskedUint16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPUWMasked512 [1] x y (VPMOVVec16x32ToM mask))) +(LessMaskedUint32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPUDMasked128 [1] x y (VPMOVVec32x4ToM mask))) +(LessMaskedUint32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPUDMasked256 [1] x y (VPMOVVec32x8ToM mask))) +(LessMaskedUint32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPUDMasked512 [1] x y (VPMOVVec32x16ToM mask))) +(LessMaskedUint64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPUQMasked128 [1] x y (VPMOVVec64x2ToM mask))) +(LessMaskedUint64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPUQMasked256 [1] x y (VPMOVVec64x4ToM mask))) +(LessMaskedUint64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPUQMasked512 [1] x y (VPMOVVec64x8ToM mask))) (MaxFloat32x4 ...) => (VMAXPS128 ...) (MaxFloat32x8 ...) => (VMAXPS256 ...) (MaxFloat32x16 ...) => (VMAXPS512 ...) @@ -1151,6 +695,36 @@ (MaxUint64x2 ...) => (VPMAXUQ128 ...) (MaxUint64x4 ...) => (VPMAXUQ256 ...) (MaxUint64x8 ...) => (VPMAXUQ512 ...) +(MaxMaskedFloat32x4 x y mask) => (VMAXPSMasked128 x y (VPMOVVec32x4ToM mask)) +(MaxMaskedFloat32x8 x y mask) => (VMAXPSMasked256 x y (VPMOVVec32x8ToM mask)) +(MaxMaskedFloat32x16 x y mask) => (VMAXPSMasked512 x y (VPMOVVec32x16ToM mask)) +(MaxMaskedFloat64x2 x y mask) => (VMAXPDMasked128 x y (VPMOVVec64x2ToM mask)) +(MaxMaskedFloat64x4 x y mask) => (VMAXPDMasked256 x y (VPMOVVec64x4ToM mask)) +(MaxMaskedFloat64x8 x y mask) => (VMAXPDMasked512 x y (VPMOVVec64x8ToM mask)) +(MaxMaskedInt8x16 x y mask) => (VPMAXSBMasked128 x y (VPMOVVec8x16ToM mask)) +(MaxMaskedInt8x32 x y mask) => (VPMAXSBMasked256 x y (VPMOVVec8x32ToM mask)) +(MaxMaskedInt8x64 x y mask) => (VPMAXSBMasked512 x y (VPMOVVec8x64ToM mask)) +(MaxMaskedInt16x8 x y mask) => (VPMAXSWMasked128 x y (VPMOVVec16x8ToM mask)) +(MaxMaskedInt16x16 x y mask) => (VPMAXSWMasked256 x y (VPMOVVec16x16ToM mask)) +(MaxMaskedInt16x32 x y mask) => (VPMAXSWMasked512 x y (VPMOVVec16x32ToM mask)) +(MaxMaskedInt32x4 x y mask) => (VPMAXSDMasked128 x y (VPMOVVec32x4ToM mask)) +(MaxMaskedInt32x8 x y mask) => (VPMAXSDMasked256 x y (VPMOVVec32x8ToM mask)) +(MaxMaskedInt32x16 x y mask) => (VPMAXSDMasked512 x y (VPMOVVec32x16ToM mask)) +(MaxMaskedInt64x2 x y mask) => (VPMAXSQMasked128 x y (VPMOVVec64x2ToM mask)) +(MaxMaskedInt64x4 x y mask) => (VPMAXSQMasked256 x y (VPMOVVec64x4ToM mask)) +(MaxMaskedInt64x8 x y mask) => (VPMAXSQMasked512 x y (VPMOVVec64x8ToM mask)) +(MaxMaskedUint8x16 x y mask) => (VPMAXUBMasked128 x y (VPMOVVec8x16ToM mask)) +(MaxMaskedUint8x32 x y mask) => (VPMAXUBMasked256 x y (VPMOVVec8x32ToM mask)) +(MaxMaskedUint8x64 x y mask) => (VPMAXUBMasked512 x y (VPMOVVec8x64ToM mask)) +(MaxMaskedUint16x8 x y mask) => (VPMAXUWMasked128 x y (VPMOVVec16x8ToM mask)) +(MaxMaskedUint16x16 x y mask) => (VPMAXUWMasked256 x y (VPMOVVec16x16ToM mask)) +(MaxMaskedUint16x32 x y mask) => (VPMAXUWMasked512 x y (VPMOVVec16x32ToM mask)) +(MaxMaskedUint32x4 x y mask) => (VPMAXUDMasked128 x y (VPMOVVec32x4ToM mask)) +(MaxMaskedUint32x8 x y mask) => (VPMAXUDMasked256 x y (VPMOVVec32x8ToM mask)) +(MaxMaskedUint32x16 x y mask) => (VPMAXUDMasked512 x y (VPMOVVec32x16ToM mask)) +(MaxMaskedUint64x2 x y mask) => (VPMAXUQMasked128 x y (VPMOVVec64x2ToM mask)) +(MaxMaskedUint64x4 x y mask) => (VPMAXUQMasked256 x y (VPMOVVec64x4ToM mask)) +(MaxMaskedUint64x8 x y mask) => (VPMAXUQMasked512 x y (VPMOVVec64x8ToM mask)) (MinFloat32x4 ...) => (VMINPS128 ...) (MinFloat32x8 ...) => (VMINPS256 ...) (MinFloat32x16 ...) => (VMINPS512 ...) @@ -1181,6 +755,36 @@ (MinUint64x2 ...) => (VPMINUQ128 ...) (MinUint64x4 ...) => (VPMINUQ256 ...) (MinUint64x8 ...) => (VPMINUQ512 ...) +(MinMaskedFloat32x4 x y mask) => (VMINPSMasked128 x y (VPMOVVec32x4ToM mask)) +(MinMaskedFloat32x8 x y mask) => (VMINPSMasked256 x y (VPMOVVec32x8ToM mask)) +(MinMaskedFloat32x16 x y mask) => (VMINPSMasked512 x y (VPMOVVec32x16ToM mask)) +(MinMaskedFloat64x2 x y mask) => (VMINPDMasked128 x y (VPMOVVec64x2ToM mask)) +(MinMaskedFloat64x4 x y mask) => (VMINPDMasked256 x y (VPMOVVec64x4ToM mask)) +(MinMaskedFloat64x8 x y mask) => (VMINPDMasked512 x y (VPMOVVec64x8ToM mask)) +(MinMaskedInt8x16 x y mask) => (VPMINSBMasked128 x y (VPMOVVec8x16ToM mask)) +(MinMaskedInt8x32 x y mask) => (VPMINSBMasked256 x y (VPMOVVec8x32ToM mask)) +(MinMaskedInt8x64 x y mask) => (VPMINSBMasked512 x y (VPMOVVec8x64ToM mask)) +(MinMaskedInt16x8 x y mask) => (VPMINSWMasked128 x y (VPMOVVec16x8ToM mask)) +(MinMaskedInt16x16 x y mask) => (VPMINSWMasked256 x y (VPMOVVec16x16ToM mask)) +(MinMaskedInt16x32 x y mask) => (VPMINSWMasked512 x y (VPMOVVec16x32ToM mask)) +(MinMaskedInt32x4 x y mask) => (VPMINSDMasked128 x y (VPMOVVec32x4ToM mask)) +(MinMaskedInt32x8 x y mask) => (VPMINSDMasked256 x y (VPMOVVec32x8ToM mask)) +(MinMaskedInt32x16 x y mask) => (VPMINSDMasked512 x y (VPMOVVec32x16ToM mask)) +(MinMaskedInt64x2 x y mask) => (VPMINSQMasked128 x y (VPMOVVec64x2ToM mask)) +(MinMaskedInt64x4 x y mask) => (VPMINSQMasked256 x y (VPMOVVec64x4ToM mask)) +(MinMaskedInt64x8 x y mask) => (VPMINSQMasked512 x y (VPMOVVec64x8ToM mask)) +(MinMaskedUint8x16 x y mask) => (VPMINUBMasked128 x y (VPMOVVec8x16ToM mask)) +(MinMaskedUint8x32 x y mask) => (VPMINUBMasked256 x y (VPMOVVec8x32ToM mask)) +(MinMaskedUint8x64 x y mask) => (VPMINUBMasked512 x y (VPMOVVec8x64ToM mask)) +(MinMaskedUint16x8 x y mask) => (VPMINUWMasked128 x y (VPMOVVec16x8ToM mask)) +(MinMaskedUint16x16 x y mask) => (VPMINUWMasked256 x y (VPMOVVec16x16ToM mask)) +(MinMaskedUint16x32 x y mask) => (VPMINUWMasked512 x y (VPMOVVec16x32ToM mask)) +(MinMaskedUint32x4 x y mask) => (VPMINUDMasked128 x y (VPMOVVec32x4ToM mask)) +(MinMaskedUint32x8 x y mask) => (VPMINUDMasked256 x y (VPMOVVec32x8ToM mask)) +(MinMaskedUint32x16 x y mask) => (VPMINUDMasked512 x y (VPMOVVec32x16ToM mask)) +(MinMaskedUint64x2 x y mask) => (VPMINUQMasked128 x y (VPMOVVec64x2ToM mask)) +(MinMaskedUint64x4 x y mask) => (VPMINUQMasked256 x y (VPMOVVec64x4ToM mask)) +(MinMaskedUint64x8 x y mask) => (VPMINUQMasked512 x y (VPMOVVec64x8ToM mask)) (MulFloat32x4 ...) => (VMULPS128 ...) (MulFloat32x8 ...) => (VMULPS256 ...) (MulFloat32x16 ...) => (VMULPS512 ...) @@ -1193,6 +797,12 @@ (MulByPowOf2Float64x2 ...) => (VSCALEFPD128 ...) (MulByPowOf2Float64x4 ...) => (VSCALEFPD256 ...) (MulByPowOf2Float64x8 ...) => (VSCALEFPD512 ...) +(MulByPowOf2MaskedFloat32x4 x y mask) => (VSCALEFPSMasked128 x y (VPMOVVec32x4ToM mask)) +(MulByPowOf2MaskedFloat32x8 x y mask) => (VSCALEFPSMasked256 x y (VPMOVVec32x8ToM mask)) +(MulByPowOf2MaskedFloat32x16 x y mask) => (VSCALEFPSMasked512 x y (VPMOVVec32x16ToM mask)) +(MulByPowOf2MaskedFloat64x2 x y mask) => (VSCALEFPDMasked128 x y (VPMOVVec64x2ToM mask)) +(MulByPowOf2MaskedFloat64x4 x y mask) => (VSCALEFPDMasked256 x y (VPMOVVec64x4ToM mask)) +(MulByPowOf2MaskedFloat64x8 x y mask) => (VSCALEFPDMasked512 x y (VPMOVVec64x8ToM mask)) (MulEvenWidenInt32x4 ...) => (VPMULDQ128 ...) (MulEvenWidenInt32x8 ...) => (VPMULDQ256 ...) (MulEvenWidenInt64x2 ...) => (VPMULDQ128 ...) @@ -1203,12 +813,24 @@ (MulEvenWidenUint64x2 ...) => (VPMULUDQ128 ...) (MulEvenWidenUint64x4 ...) => (VPMULUDQ256 ...) (MulEvenWidenUint64x8 ...) => (VPMULUDQ512 ...) +(MulEvenWidenMaskedInt64x2 x y mask) => (VPMULDQMasked128 x y (VPMOVVec64x2ToM mask)) +(MulEvenWidenMaskedInt64x4 x y mask) => (VPMULDQMasked256 x y (VPMOVVec64x4ToM mask)) +(MulEvenWidenMaskedInt64x8 x y mask) => (VPMULDQMasked512 x y (VPMOVVec64x8ToM mask)) +(MulEvenWidenMaskedUint64x2 x y mask) => (VPMULUDQMasked128 x y (VPMOVVec64x2ToM mask)) +(MulEvenWidenMaskedUint64x4 x y mask) => (VPMULUDQMasked256 x y (VPMOVVec64x4ToM mask)) +(MulEvenWidenMaskedUint64x8 x y mask) => (VPMULUDQMasked512 x y (VPMOVVec64x8ToM mask)) (MulHighInt16x8 ...) => (VPMULHW128 ...) (MulHighInt16x16 ...) => (VPMULHW256 ...) (MulHighInt16x32 ...) => (VPMULHW512 ...) (MulHighUint16x8 ...) => (VPMULHUW128 ...) (MulHighUint16x16 ...) => (VPMULHUW256 ...) (MulHighUint16x32 ...) => (VPMULHUW512 ...) +(MulHighMaskedInt16x8 x y mask) => (VPMULHWMasked128 x y (VPMOVVec16x8ToM mask)) +(MulHighMaskedInt16x16 x y mask) => (VPMULHWMasked256 x y (VPMOVVec16x16ToM mask)) +(MulHighMaskedInt16x32 x y mask) => (VPMULHWMasked512 x y (VPMOVVec16x32ToM mask)) +(MulHighMaskedUint16x8 x y mask) => (VPMULHUWMasked128 x y (VPMOVVec16x8ToM mask)) +(MulHighMaskedUint16x16 x y mask) => (VPMULHUWMasked256 x y (VPMOVVec16x16ToM mask)) +(MulHighMaskedUint16x32 x y mask) => (VPMULHUWMasked512 x y (VPMOVVec16x32ToM mask)) (MulLowInt16x8 ...) => (VPMULLW128 ...) (MulLowInt16x16 ...) => (VPMULLW256 ...) (MulLowInt16x32 ...) => (VPMULLW512 ...) @@ -1218,6 +840,21 @@ (MulLowInt64x2 ...) => (VPMULLQ128 ...) (MulLowInt64x4 ...) => (VPMULLQ256 ...) (MulLowInt64x8 ...) => (VPMULLQ512 ...) +(MulLowMaskedInt16x8 x y mask) => (VPMULLWMasked128 x y (VPMOVVec16x8ToM mask)) +(MulLowMaskedInt16x16 x y mask) => (VPMULLWMasked256 x y (VPMOVVec16x16ToM mask)) +(MulLowMaskedInt16x32 x y mask) => (VPMULLWMasked512 x y (VPMOVVec16x32ToM mask)) +(MulLowMaskedInt32x4 x y mask) => (VPMULLDMasked128 x y (VPMOVVec32x4ToM mask)) +(MulLowMaskedInt32x8 x y mask) => (VPMULLDMasked256 x y (VPMOVVec32x8ToM mask)) +(MulLowMaskedInt32x16 x y mask) => (VPMULLDMasked512 x y (VPMOVVec32x16ToM mask)) +(MulLowMaskedInt64x2 x y mask) => (VPMULLQMasked128 x y (VPMOVVec64x2ToM mask)) +(MulLowMaskedInt64x4 x y mask) => (VPMULLQMasked256 x y (VPMOVVec64x4ToM mask)) +(MulLowMaskedInt64x8 x y mask) => (VPMULLQMasked512 x y (VPMOVVec64x8ToM mask)) +(MulMaskedFloat32x4 x y mask) => (VMULPSMasked128 x y (VPMOVVec32x4ToM mask)) +(MulMaskedFloat32x8 x y mask) => (VMULPSMasked256 x y (VPMOVVec32x8ToM mask)) +(MulMaskedFloat32x16 x y mask) => (VMULPSMasked512 x y (VPMOVVec32x16ToM mask)) +(MulMaskedFloat64x2 x y mask) => (VMULPDMasked128 x y (VPMOVVec64x2ToM mask)) +(MulMaskedFloat64x4 x y mask) => (VMULPDMasked256 x y (VPMOVVec64x4ToM mask)) +(MulMaskedFloat64x8 x y mask) => (VMULPDMasked512 x y (VPMOVVec64x8ToM mask)) (NotEqualFloat32x4 x y) => (VCMPPS128 [4] x y) (NotEqualFloat32x8 x y) => (VCMPPS256 [4] x y) (NotEqualFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [4] x y)) @@ -1248,6 +885,36 @@ (NotEqualUint64x2 x y) => (VPMOVMToVec64x2 (VPCMPUQ128 [4] x y)) (NotEqualUint64x4 x y) => (VPMOVMToVec64x4 (VPCMPUQ256 [4] x y)) (NotEqualUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [4] x y)) +(NotEqualMaskedFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [4] x y (VPMOVVec32x4ToM mask))) +(NotEqualMaskedFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [4] x y (VPMOVVec32x8ToM mask))) +(NotEqualMaskedFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [4] x y (VPMOVVec32x16ToM mask))) +(NotEqualMaskedFloat64x2 x y mask) => (VPMOVMToVec64x2 (VCMPPDMasked128 [4] x y (VPMOVVec64x2ToM mask))) +(NotEqualMaskedFloat64x4 x y mask) => (VPMOVMToVec64x4 (VCMPPDMasked256 [4] x y (VPMOVVec64x4ToM mask))) +(NotEqualMaskedFloat64x8 x y mask) => (VPMOVMToVec64x8 (VCMPPDMasked512 [4] x y (VPMOVVec64x8ToM mask))) +(NotEqualMaskedInt8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPBMasked128 [4] x y (VPMOVVec8x16ToM mask))) +(NotEqualMaskedInt8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPBMasked256 [4] x y (VPMOVVec8x32ToM mask))) +(NotEqualMaskedInt8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPBMasked512 [4] x y (VPMOVVec8x64ToM mask))) +(NotEqualMaskedInt16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPWMasked128 [4] x y (VPMOVVec16x8ToM mask))) +(NotEqualMaskedInt16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPWMasked256 [4] x y (VPMOVVec16x16ToM mask))) +(NotEqualMaskedInt16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPWMasked512 [4] x y (VPMOVVec16x32ToM mask))) +(NotEqualMaskedInt32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPDMasked128 [4] x y (VPMOVVec32x4ToM mask))) +(NotEqualMaskedInt32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPDMasked256 [4] x y (VPMOVVec32x8ToM mask))) +(NotEqualMaskedInt32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPDMasked512 [4] x y (VPMOVVec32x16ToM mask))) +(NotEqualMaskedInt64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPQMasked128 [4] x y (VPMOVVec64x2ToM mask))) +(NotEqualMaskedInt64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPQMasked256 [4] x y (VPMOVVec64x4ToM mask))) +(NotEqualMaskedInt64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPQMasked512 [4] x y (VPMOVVec64x8ToM mask))) +(NotEqualMaskedUint8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPUBMasked128 [4] x y (VPMOVVec8x16ToM mask))) +(NotEqualMaskedUint8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPUBMasked256 [4] x y (VPMOVVec8x32ToM mask))) +(NotEqualMaskedUint8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPUBMasked512 [4] x y (VPMOVVec8x64ToM mask))) +(NotEqualMaskedUint16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPUWMasked128 [4] x y (VPMOVVec16x8ToM mask))) +(NotEqualMaskedUint16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPUWMasked256 [4] x y (VPMOVVec16x16ToM mask))) +(NotEqualMaskedUint16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPUWMasked512 [4] x y (VPMOVVec16x32ToM mask))) +(NotEqualMaskedUint32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPUDMasked128 [4] x y (VPMOVVec32x4ToM mask))) +(NotEqualMaskedUint32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPUDMasked256 [4] x y (VPMOVVec32x8ToM mask))) +(NotEqualMaskedUint32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPUDMasked512 [4] x y (VPMOVVec32x16ToM mask))) +(NotEqualMaskedUint64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPUQMasked128 [4] x y (VPMOVVec64x2ToM mask))) +(NotEqualMaskedUint64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPUQMasked256 [4] x y (VPMOVVec64x4ToM mask))) +(NotEqualMaskedUint64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPUQMasked512 [4] x y (VPMOVVec64x8ToM mask))) (OrInt8x16 ...) => (VPOR128 ...) (OrInt8x32 ...) => (VPOR256 ...) (OrInt16x8 ...) => (VPOR128 ...) @@ -1268,12 +935,30 @@ (OrUint64x2 ...) => (VPOR128 ...) (OrUint64x4 ...) => (VPOR256 ...) (OrUint64x8 ...) => (VPORQ512 ...) +(OrMaskedInt32x4 x y mask) => (VPORDMasked128 x y (VPMOVVec32x4ToM mask)) +(OrMaskedInt32x8 x y mask) => (VPORDMasked256 x y (VPMOVVec32x8ToM mask)) +(OrMaskedInt32x16 x y mask) => (VPORDMasked512 x y (VPMOVVec32x16ToM mask)) +(OrMaskedInt64x2 x y mask) => (VPORQMasked128 x y (VPMOVVec64x2ToM mask)) +(OrMaskedInt64x4 x y mask) => (VPORQMasked256 x y (VPMOVVec64x4ToM mask)) +(OrMaskedInt64x8 x y mask) => (VPORQMasked512 x y (VPMOVVec64x8ToM mask)) +(OrMaskedUint32x4 x y mask) => (VPORDMasked128 x y (VPMOVVec32x4ToM mask)) +(OrMaskedUint32x8 x y mask) => (VPORDMasked256 x y (VPMOVVec32x8ToM mask)) +(OrMaskedUint32x16 x y mask) => (VPORDMasked512 x y (VPMOVVec32x16ToM mask)) +(OrMaskedUint64x2 x y mask) => (VPORQMasked128 x y (VPMOVVec64x2ToM mask)) +(OrMaskedUint64x4 x y mask) => (VPORQMasked256 x y (VPMOVVec64x4ToM mask)) +(OrMaskedUint64x8 x y mask) => (VPORQMasked512 x y (VPMOVVec64x8ToM mask)) (PairDotProdInt16x8 ...) => (VPMADDWD128 ...) (PairDotProdInt16x16 ...) => (VPMADDWD256 ...) (PairDotProdInt16x32 ...) => (VPMADDWD512 ...) (PairDotProdAccumulateInt32x4 ...) => (VPDPWSSD128 ...) (PairDotProdAccumulateInt32x8 ...) => (VPDPWSSD256 ...) (PairDotProdAccumulateInt32x16 ...) => (VPDPWSSD512 ...) +(PairDotProdAccumulateMaskedInt32x4 x y z mask) => (VPDPWSSDMasked128 x y z (VPMOVVec32x4ToM mask)) +(PairDotProdAccumulateMaskedInt32x8 x y z mask) => (VPDPWSSDMasked256 x y z (VPMOVVec32x8ToM mask)) +(PairDotProdAccumulateMaskedInt32x16 x y z mask) => (VPDPWSSDMasked512 x y z (VPMOVVec32x16ToM mask)) +(PairDotProdMaskedInt16x8 x y mask) => (VPMADDWDMasked128 x y (VPMOVVec16x8ToM mask)) +(PairDotProdMaskedInt16x16 x y mask) => (VPMADDWDMasked256 x y (VPMOVVec16x16ToM mask)) +(PairDotProdMaskedInt16x32 x y mask) => (VPMADDWDMasked512 x y (VPMOVVec16x32ToM mask)) (PairwiseAddFloat32x4 ...) => (VHADDPS128 ...) (PairwiseAddFloat32x8 ...) => (VHADDPS256 ...) (PairwiseAddFloat64x2 ...) => (VHADDPD128 ...) @@ -1322,6 +1007,30 @@ (PopCountUint64x2 ...) => (VPOPCNTQ128 ...) (PopCountUint64x4 ...) => (VPOPCNTQ256 ...) (PopCountUint64x8 ...) => (VPOPCNTQ512 ...) +(PopCountMaskedInt8x16 x mask) => (VPOPCNTBMasked128 x (VPMOVVec8x16ToM mask)) +(PopCountMaskedInt8x32 x mask) => (VPOPCNTBMasked256 x (VPMOVVec8x32ToM mask)) +(PopCountMaskedInt8x64 x mask) => (VPOPCNTBMasked512 x (VPMOVVec8x64ToM mask)) +(PopCountMaskedInt16x8 x mask) => (VPOPCNTWMasked128 x (VPMOVVec16x8ToM mask)) +(PopCountMaskedInt16x16 x mask) => (VPOPCNTWMasked256 x (VPMOVVec16x16ToM mask)) +(PopCountMaskedInt16x32 x mask) => (VPOPCNTWMasked512 x (VPMOVVec16x32ToM mask)) +(PopCountMaskedInt32x4 x mask) => (VPOPCNTDMasked128 x (VPMOVVec32x4ToM mask)) +(PopCountMaskedInt32x8 x mask) => (VPOPCNTDMasked256 x (VPMOVVec32x8ToM mask)) +(PopCountMaskedInt32x16 x mask) => (VPOPCNTDMasked512 x (VPMOVVec32x16ToM mask)) +(PopCountMaskedInt64x2 x mask) => (VPOPCNTQMasked128 x (VPMOVVec64x2ToM mask)) +(PopCountMaskedInt64x4 x mask) => (VPOPCNTQMasked256 x (VPMOVVec64x4ToM mask)) +(PopCountMaskedInt64x8 x mask) => (VPOPCNTQMasked512 x (VPMOVVec64x8ToM mask)) +(PopCountMaskedUint8x16 x mask) => (VPOPCNTBMasked128 x (VPMOVVec8x16ToM mask)) +(PopCountMaskedUint8x32 x mask) => (VPOPCNTBMasked256 x (VPMOVVec8x32ToM mask)) +(PopCountMaskedUint8x64 x mask) => (VPOPCNTBMasked512 x (VPMOVVec8x64ToM mask)) +(PopCountMaskedUint16x8 x mask) => (VPOPCNTWMasked128 x (VPMOVVec16x8ToM mask)) +(PopCountMaskedUint16x16 x mask) => (VPOPCNTWMasked256 x (VPMOVVec16x16ToM mask)) +(PopCountMaskedUint16x32 x mask) => (VPOPCNTWMasked512 x (VPMOVVec16x32ToM mask)) +(PopCountMaskedUint32x4 x mask) => (VPOPCNTDMasked128 x (VPMOVVec32x4ToM mask)) +(PopCountMaskedUint32x8 x mask) => (VPOPCNTDMasked256 x (VPMOVVec32x8ToM mask)) +(PopCountMaskedUint32x16 x mask) => (VPOPCNTDMasked512 x (VPMOVVec32x16ToM mask)) +(PopCountMaskedUint64x2 x mask) => (VPOPCNTQMasked128 x (VPMOVVec64x2ToM mask)) +(PopCountMaskedUint64x4 x mask) => (VPOPCNTQMasked256 x (VPMOVVec64x4ToM mask)) +(PopCountMaskedUint64x8 x mask) => (VPOPCNTQMasked512 x (VPMOVVec64x8ToM mask)) (RotateAllLeftInt32x4 [a] x) => (VPROLD128 [a] x) (RotateAllLeftInt32x8 [a] x) => (VPROLD256 [a] x) (RotateAllLeftInt32x16 [a] x) => (VPROLD512 [a] x) @@ -1334,6 +1043,18 @@ (RotateAllLeftUint64x2 [a] x) => (VPROLQ128 [a] x) (RotateAllLeftUint64x4 [a] x) => (VPROLQ256 [a] x) (RotateAllLeftUint64x8 [a] x) => (VPROLQ512 [a] x) +(RotateAllLeftMaskedInt32x4 [a] x mask) => (VPROLDMasked128 [a] x (VPMOVVec32x4ToM mask)) +(RotateAllLeftMaskedInt32x8 [a] x mask) => (VPROLDMasked256 [a] x (VPMOVVec32x8ToM mask)) +(RotateAllLeftMaskedInt32x16 [a] x mask) => (VPROLDMasked512 [a] x (VPMOVVec32x16ToM mask)) +(RotateAllLeftMaskedInt64x2 [a] x mask) => (VPROLQMasked128 [a] x (VPMOVVec64x2ToM mask)) +(RotateAllLeftMaskedInt64x4 [a] x mask) => (VPROLQMasked256 [a] x (VPMOVVec64x4ToM mask)) +(RotateAllLeftMaskedInt64x8 [a] x mask) => (VPROLQMasked512 [a] x (VPMOVVec64x8ToM mask)) +(RotateAllLeftMaskedUint32x4 [a] x mask) => (VPROLDMasked128 [a] x (VPMOVVec32x4ToM mask)) +(RotateAllLeftMaskedUint32x8 [a] x mask) => (VPROLDMasked256 [a] x (VPMOVVec32x8ToM mask)) +(RotateAllLeftMaskedUint32x16 [a] x mask) => (VPROLDMasked512 [a] x (VPMOVVec32x16ToM mask)) +(RotateAllLeftMaskedUint64x2 [a] x mask) => (VPROLQMasked128 [a] x (VPMOVVec64x2ToM mask)) +(RotateAllLeftMaskedUint64x4 [a] x mask) => (VPROLQMasked256 [a] x (VPMOVVec64x4ToM mask)) +(RotateAllLeftMaskedUint64x8 [a] x mask) => (VPROLQMasked512 [a] x (VPMOVVec64x8ToM mask)) (RotateAllRightInt32x4 [a] x) => (VPRORD128 [a] x) (RotateAllRightInt32x8 [a] x) => (VPRORD256 [a] x) (RotateAllRightInt32x16 [a] x) => (VPRORD512 [a] x) @@ -1346,6 +1067,18 @@ (RotateAllRightUint64x2 [a] x) => (VPRORQ128 [a] x) (RotateAllRightUint64x4 [a] x) => (VPRORQ256 [a] x) (RotateAllRightUint64x8 [a] x) => (VPRORQ512 [a] x) +(RotateAllRightMaskedInt32x4 [a] x mask) => (VPRORDMasked128 [a] x (VPMOVVec32x4ToM mask)) +(RotateAllRightMaskedInt32x8 [a] x mask) => (VPRORDMasked256 [a] x (VPMOVVec32x8ToM mask)) +(RotateAllRightMaskedInt32x16 [a] x mask) => (VPRORDMasked512 [a] x (VPMOVVec32x16ToM mask)) +(RotateAllRightMaskedInt64x2 [a] x mask) => (VPRORQMasked128 [a] x (VPMOVVec64x2ToM mask)) +(RotateAllRightMaskedInt64x4 [a] x mask) => (VPRORQMasked256 [a] x (VPMOVVec64x4ToM mask)) +(RotateAllRightMaskedInt64x8 [a] x mask) => (VPRORQMasked512 [a] x (VPMOVVec64x8ToM mask)) +(RotateAllRightMaskedUint32x4 [a] x mask) => (VPRORDMasked128 [a] x (VPMOVVec32x4ToM mask)) +(RotateAllRightMaskedUint32x8 [a] x mask) => (VPRORDMasked256 [a] x (VPMOVVec32x8ToM mask)) +(RotateAllRightMaskedUint32x16 [a] x mask) => (VPRORDMasked512 [a] x (VPMOVVec32x16ToM mask)) +(RotateAllRightMaskedUint64x2 [a] x mask) => (VPRORQMasked128 [a] x (VPMOVVec64x2ToM mask)) +(RotateAllRightMaskedUint64x4 [a] x mask) => (VPRORQMasked256 [a] x (VPMOVVec64x4ToM mask)) +(RotateAllRightMaskedUint64x8 [a] x mask) => (VPRORQMasked512 [a] x (VPMOVVec64x8ToM mask)) (RotateLeftInt32x4 ...) => (VPROLVD128 ...) (RotateLeftInt32x8 ...) => (VPROLVD256 ...) (RotateLeftInt32x16 ...) => (VPROLVD512 ...) @@ -1358,6 +1091,18 @@ (RotateLeftUint64x2 ...) => (VPROLVQ128 ...) (RotateLeftUint64x4 ...) => (VPROLVQ256 ...) (RotateLeftUint64x8 ...) => (VPROLVQ512 ...) +(RotateLeftMaskedInt32x4 x y mask) => (VPROLVDMasked128 x y (VPMOVVec32x4ToM mask)) +(RotateLeftMaskedInt32x8 x y mask) => (VPROLVDMasked256 x y (VPMOVVec32x8ToM mask)) +(RotateLeftMaskedInt32x16 x y mask) => (VPROLVDMasked512 x y (VPMOVVec32x16ToM mask)) +(RotateLeftMaskedInt64x2 x y mask) => (VPROLVQMasked128 x y (VPMOVVec64x2ToM mask)) +(RotateLeftMaskedInt64x4 x y mask) => (VPROLVQMasked256 x y (VPMOVVec64x4ToM mask)) +(RotateLeftMaskedInt64x8 x y mask) => (VPROLVQMasked512 x y (VPMOVVec64x8ToM mask)) +(RotateLeftMaskedUint32x4 x y mask) => (VPROLVDMasked128 x y (VPMOVVec32x4ToM mask)) +(RotateLeftMaskedUint32x8 x y mask) => (VPROLVDMasked256 x y (VPMOVVec32x8ToM mask)) +(RotateLeftMaskedUint32x16 x y mask) => (VPROLVDMasked512 x y (VPMOVVec32x16ToM mask)) +(RotateLeftMaskedUint64x2 x y mask) => (VPROLVQMasked128 x y (VPMOVVec64x2ToM mask)) +(RotateLeftMaskedUint64x4 x y mask) => (VPROLVQMasked256 x y (VPMOVVec64x4ToM mask)) +(RotateLeftMaskedUint64x8 x y mask) => (VPROLVQMasked512 x y (VPMOVVec64x8ToM mask)) (RotateRightInt32x4 ...) => (VPRORVD128 ...) (RotateRightInt32x8 ...) => (VPRORVD256 ...) (RotateRightInt32x16 ...) => (VPRORVD512 ...) @@ -1370,6 +1115,18 @@ (RotateRightUint64x2 ...) => (VPRORVQ128 ...) (RotateRightUint64x4 ...) => (VPRORVQ256 ...) (RotateRightUint64x8 ...) => (VPRORVQ512 ...) +(RotateRightMaskedInt32x4 x y mask) => (VPRORVDMasked128 x y (VPMOVVec32x4ToM mask)) +(RotateRightMaskedInt32x8 x y mask) => (VPRORVDMasked256 x y (VPMOVVec32x8ToM mask)) +(RotateRightMaskedInt32x16 x y mask) => (VPRORVDMasked512 x y (VPMOVVec32x16ToM mask)) +(RotateRightMaskedInt64x2 x y mask) => (VPRORVQMasked128 x y (VPMOVVec64x2ToM mask)) +(RotateRightMaskedInt64x4 x y mask) => (VPRORVQMasked256 x y (VPMOVVec64x4ToM mask)) +(RotateRightMaskedInt64x8 x y mask) => (VPRORVQMasked512 x y (VPMOVVec64x8ToM mask)) +(RotateRightMaskedUint32x4 x y mask) => (VPRORVDMasked128 x y (VPMOVVec32x4ToM mask)) +(RotateRightMaskedUint32x8 x y mask) => (VPRORVDMasked256 x y (VPMOVVec32x8ToM mask)) +(RotateRightMaskedUint32x16 x y mask) => (VPRORVDMasked512 x y (VPMOVVec32x16ToM mask)) +(RotateRightMaskedUint64x2 x y mask) => (VPRORVQMasked128 x y (VPMOVVec64x2ToM mask)) +(RotateRightMaskedUint64x4 x y mask) => (VPRORVQMasked256 x y (VPMOVVec64x4ToM mask)) +(RotateRightMaskedUint64x8 x y mask) => (VPRORVQMasked512 x y (VPMOVVec64x8ToM mask)) (RoundFloat32x4 x) => (VROUNDPS128 [0] x) (RoundFloat32x8 x) => (VROUNDPS256 [0] x) (RoundFloat64x2 x) => (VROUNDPD128 [0] x) @@ -1380,6 +1137,12 @@ (RoundWithPrecisionFloat64x2 [a] x) => (VRNDSCALEPD128 [a+0] x) (RoundWithPrecisionFloat64x4 [a] x) => (VRNDSCALEPD256 [a+0] x) (RoundWithPrecisionFloat64x8 [a] x) => (VRNDSCALEPD512 [a+0] x) +(RoundWithPrecisionMaskedFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+0] x (VPMOVVec32x4ToM mask)) +(RoundWithPrecisionMaskedFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+0] x (VPMOVVec32x8ToM mask)) +(RoundWithPrecisionMaskedFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+0] x (VPMOVVec32x16ToM mask)) +(RoundWithPrecisionMaskedFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+0] x (VPMOVVec64x2ToM mask)) +(RoundWithPrecisionMaskedFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+0] x (VPMOVVec64x4ToM mask)) +(RoundWithPrecisionMaskedFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+0] x (VPMOVVec64x8ToM mask)) (SaturatedAddInt8x16 ...) => (VPADDSB128 ...) (SaturatedAddInt8x32 ...) => (VPADDSB256 ...) (SaturatedAddInt8x64 ...) => (VPADDSB512 ...) @@ -1392,9 +1155,24 @@ (SaturatedAddUint16x8 ...) => (VPADDSW128 ...) (SaturatedAddUint16x16 ...) => (VPADDSW256 ...) (SaturatedAddUint16x32 ...) => (VPADDSW512 ...) +(SaturatedAddMaskedInt8x16 x y mask) => (VPADDSBMasked128 x y (VPMOVVec8x16ToM mask)) +(SaturatedAddMaskedInt8x32 x y mask) => (VPADDSBMasked256 x y (VPMOVVec8x32ToM mask)) +(SaturatedAddMaskedInt8x64 x y mask) => (VPADDSBMasked512 x y (VPMOVVec8x64ToM mask)) +(SaturatedAddMaskedInt16x8 x y mask) => (VPADDSWMasked128 x y (VPMOVVec16x8ToM mask)) +(SaturatedAddMaskedInt16x16 x y mask) => (VPADDSWMasked256 x y (VPMOVVec16x16ToM mask)) +(SaturatedAddMaskedInt16x32 x y mask) => (VPADDSWMasked512 x y (VPMOVVec16x32ToM mask)) +(SaturatedAddMaskedUint8x16 x y mask) => (VPADDSBMasked128 x y (VPMOVVec8x16ToM mask)) +(SaturatedAddMaskedUint8x32 x y mask) => (VPADDSBMasked256 x y (VPMOVVec8x32ToM mask)) +(SaturatedAddMaskedUint8x64 x y mask) => (VPADDSBMasked512 x y (VPMOVVec8x64ToM mask)) +(SaturatedAddMaskedUint16x8 x y mask) => (VPADDSWMasked128 x y (VPMOVVec16x8ToM mask)) +(SaturatedAddMaskedUint16x16 x y mask) => (VPADDSWMasked256 x y (VPMOVVec16x16ToM mask)) +(SaturatedAddMaskedUint16x32 x y mask) => (VPADDSWMasked512 x y (VPMOVVec16x32ToM mask)) (SaturatedPairDotProdAccumulateInt32x4 ...) => (VPDPWSSDS128 ...) (SaturatedPairDotProdAccumulateInt32x8 ...) => (VPDPWSSDS256 ...) (SaturatedPairDotProdAccumulateInt32x16 ...) => (VPDPWSSDS512 ...) +(SaturatedPairDotProdAccumulateMaskedInt32x4 x y z mask) => (VPDPWSSDSMasked128 x y z (VPMOVVec32x4ToM mask)) +(SaturatedPairDotProdAccumulateMaskedInt32x8 x y z mask) => (VPDPWSSDSMasked256 x y z (VPMOVVec32x8ToM mask)) +(SaturatedPairDotProdAccumulateMaskedInt32x16 x y z mask) => (VPDPWSSDSMasked512 x y z (VPMOVVec32x16ToM mask)) (SaturatedPairwiseAddInt16x8 ...) => (VPHADDSW128 ...) (SaturatedPairwiseAddInt16x16 ...) => (VPHADDSW256 ...) (SaturatedPairwiseSubInt16x8 ...) => (VPHSUBSW128 ...) @@ -1411,15 +1189,36 @@ (SaturatedSubUint16x8 ...) => (VPSUBSW128 ...) (SaturatedSubUint16x16 ...) => (VPSUBSW256 ...) (SaturatedSubUint16x32 ...) => (VPSUBSW512 ...) +(SaturatedSubMaskedInt8x16 x y mask) => (VPSUBSBMasked128 x y (VPMOVVec8x16ToM mask)) +(SaturatedSubMaskedInt8x32 x y mask) => (VPSUBSBMasked256 x y (VPMOVVec8x32ToM mask)) +(SaturatedSubMaskedInt8x64 x y mask) => (VPSUBSBMasked512 x y (VPMOVVec8x64ToM mask)) +(SaturatedSubMaskedInt16x8 x y mask) => (VPSUBSWMasked128 x y (VPMOVVec16x8ToM mask)) +(SaturatedSubMaskedInt16x16 x y mask) => (VPSUBSWMasked256 x y (VPMOVVec16x16ToM mask)) +(SaturatedSubMaskedInt16x32 x y mask) => (VPSUBSWMasked512 x y (VPMOVVec16x32ToM mask)) +(SaturatedSubMaskedUint8x16 x y mask) => (VPSUBSBMasked128 x y (VPMOVVec8x16ToM mask)) +(SaturatedSubMaskedUint8x32 x y mask) => (VPSUBSBMasked256 x y (VPMOVVec8x32ToM mask)) +(SaturatedSubMaskedUint8x64 x y mask) => (VPSUBSBMasked512 x y (VPMOVVec8x64ToM mask)) +(SaturatedSubMaskedUint16x8 x y mask) => (VPSUBSWMasked128 x y (VPMOVVec16x8ToM mask)) +(SaturatedSubMaskedUint16x16 x y mask) => (VPSUBSWMasked256 x y (VPMOVVec16x16ToM mask)) +(SaturatedSubMaskedUint16x32 x y mask) => (VPSUBSWMasked512 x y (VPMOVVec16x32ToM mask)) (SaturatedUnsignedSignedPairDotProdUint8x16 ...) => (VPMADDUBSW128 ...) (SaturatedUnsignedSignedPairDotProdUint8x32 ...) => (VPMADDUBSW256 ...) (SaturatedUnsignedSignedPairDotProdUint8x64 ...) => (VPMADDUBSW512 ...) +(SaturatedUnsignedSignedPairDotProdMaskedUint8x16 x y mask) => (VPMADDUBSWMasked128 x y (VPMOVVec16x8ToM mask)) +(SaturatedUnsignedSignedPairDotProdMaskedUint8x32 x y mask) => (VPMADDUBSWMasked256 x y (VPMOVVec16x16ToM mask)) +(SaturatedUnsignedSignedPairDotProdMaskedUint8x64 x y mask) => (VPMADDUBSWMasked512 x y (VPMOVVec16x32ToM mask)) (SaturatedUnsignedSignedQuadDotProdAccumulateInt32x4 ...) => (VPDPBUSDS128 ...) (SaturatedUnsignedSignedQuadDotProdAccumulateInt32x8 ...) => (VPDPBUSDS256 ...) (SaturatedUnsignedSignedQuadDotProdAccumulateInt32x16 ...) => (VPDPBUSDS512 ...) (SaturatedUnsignedSignedQuadDotProdAccumulateUint32x4 ...) => (VPDPBUSDS128 ...) (SaturatedUnsignedSignedQuadDotProdAccumulateUint32x8 ...) => (VPDPBUSDS256 ...) (SaturatedUnsignedSignedQuadDotProdAccumulateUint32x16 ...) => (VPDPBUSDS512 ...) +(SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x4 x y z mask) => (VPDPBUSDSMasked128 x y z (VPMOVVec32x4ToM mask)) +(SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x8 x y z mask) => (VPDPBUSDSMasked256 x y z (VPMOVVec32x8ToM mask)) +(SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x16 x y z mask) => (VPDPBUSDSMasked512 x y z (VPMOVVec32x16ToM mask)) +(SaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x4 x y z mask) => (VPDPBUSDSMasked128 x y z (VPMOVVec32x4ToM mask)) +(SaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x8 x y z mask) => (VPDPBUSDSMasked256 x y z (VPMOVVec32x8ToM mask)) +(SaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x16 x y z mask) => (VPDPBUSDSMasked512 x y z (VPMOVVec32x16ToM mask)) (Set128Float32x8 [a] x y) => (VINSERTF128256 [a] x y) (Set128Float64x4 [a] x y) => (VINSERTF128256 [a] x y) (Set128Int8x32 [a] x y) => (VINSERTI128256 [a] x y) @@ -1470,6 +1269,30 @@ (ShiftAllLeftAndFillUpperFromUint64x2 [a] x y) => (VPSHLDQ128 [a] x y) (ShiftAllLeftAndFillUpperFromUint64x4 [a] x y) => (VPSHLDQ256 [a] x y) (ShiftAllLeftAndFillUpperFromUint64x8 [a] x y) => (VPSHLDQ512 [a] x y) +(ShiftAllLeftAndFillUpperFromMaskedInt16x8 [a] x y mask) => (VPSHLDWMasked128 [a] x y (VPMOVVec16x8ToM mask)) +(ShiftAllLeftAndFillUpperFromMaskedInt16x16 [a] x y mask) => (VPSHLDWMasked256 [a] x y (VPMOVVec16x16ToM mask)) +(ShiftAllLeftAndFillUpperFromMaskedInt16x32 [a] x y mask) => (VPSHLDWMasked512 [a] x y (VPMOVVec16x32ToM mask)) +(ShiftAllLeftAndFillUpperFromMaskedInt32x4 [a] x y mask) => (VPSHLDDMasked128 [a] x y (VPMOVVec32x4ToM mask)) +(ShiftAllLeftAndFillUpperFromMaskedInt32x8 [a] x y mask) => (VPSHLDDMasked256 [a] x y (VPMOVVec32x8ToM mask)) +(ShiftAllLeftAndFillUpperFromMaskedInt32x16 [a] x y mask) => (VPSHLDDMasked512 [a] x y (VPMOVVec32x16ToM mask)) +(ShiftAllLeftAndFillUpperFromMaskedInt64x2 [a] x y mask) => (VPSHLDQMasked128 [a] x y (VPMOVVec64x2ToM mask)) +(ShiftAllLeftAndFillUpperFromMaskedInt64x4 [a] x y mask) => (VPSHLDQMasked256 [a] x y (VPMOVVec64x4ToM mask)) +(ShiftAllLeftAndFillUpperFromMaskedInt64x8 [a] x y mask) => (VPSHLDQMasked512 [a] x y (VPMOVVec64x8ToM mask)) +(ShiftAllLeftAndFillUpperFromMaskedUint16x8 [a] x y mask) => (VPSHLDWMasked128 [a] x y (VPMOVVec16x8ToM mask)) +(ShiftAllLeftAndFillUpperFromMaskedUint16x16 [a] x y mask) => (VPSHLDWMasked256 [a] x y (VPMOVVec16x16ToM mask)) +(ShiftAllLeftAndFillUpperFromMaskedUint16x32 [a] x y mask) => (VPSHLDWMasked512 [a] x y (VPMOVVec16x32ToM mask)) +(ShiftAllLeftAndFillUpperFromMaskedUint32x4 [a] x y mask) => (VPSHLDDMasked128 [a] x y (VPMOVVec32x4ToM mask)) +(ShiftAllLeftAndFillUpperFromMaskedUint32x8 [a] x y mask) => (VPSHLDDMasked256 [a] x y (VPMOVVec32x8ToM mask)) +(ShiftAllLeftAndFillUpperFromMaskedUint32x16 [a] x y mask) => (VPSHLDDMasked512 [a] x y (VPMOVVec32x16ToM mask)) +(ShiftAllLeftAndFillUpperFromMaskedUint64x2 [a] x y mask) => (VPSHLDQMasked128 [a] x y (VPMOVVec64x2ToM mask)) +(ShiftAllLeftAndFillUpperFromMaskedUint64x4 [a] x y mask) => (VPSHLDQMasked256 [a] x y (VPMOVVec64x4ToM mask)) +(ShiftAllLeftAndFillUpperFromMaskedUint64x8 [a] x y mask) => (VPSHLDQMasked512 [a] x y (VPMOVVec64x8ToM mask)) +(ShiftAllLeftMaskedInt64x2 x y mask) => (VPSLLQMasked128 x y (VPMOVVec64x2ToM mask)) +(ShiftAllLeftMaskedInt64x4 x y mask) => (VPSLLQMasked256 x y (VPMOVVec64x4ToM mask)) +(ShiftAllLeftMaskedInt64x8 x y mask) => (VPSLLQMasked512 x y (VPMOVVec64x8ToM mask)) +(ShiftAllLeftMaskedUint64x2 x y mask) => (VPSLLQMasked128 x y (VPMOVVec64x2ToM mask)) +(ShiftAllLeftMaskedUint64x4 x y mask) => (VPSLLQMasked256 x y (VPMOVVec64x4ToM mask)) +(ShiftAllLeftMaskedUint64x8 x y mask) => (VPSLLQMasked512 x y (VPMOVVec64x8ToM mask)) (ShiftAllRightInt16x8 ...) => (VPSRLW128 ...) (ShiftAllRightInt16x16 ...) => (VPSRLW256 ...) (ShiftAllRightInt32x4 ...) => (VPSRLD128 ...) @@ -1502,6 +1325,30 @@ (ShiftAllRightAndFillUpperFromUint64x2 [a] x y) => (VPSHRDQ128 [a] x y) (ShiftAllRightAndFillUpperFromUint64x4 [a] x y) => (VPSHRDQ256 [a] x y) (ShiftAllRightAndFillUpperFromUint64x8 [a] x y) => (VPSHRDQ512 [a] x y) +(ShiftAllRightAndFillUpperFromMaskedInt16x8 [a] x y mask) => (VPSHRDWMasked128 [a] x y (VPMOVVec16x8ToM mask)) +(ShiftAllRightAndFillUpperFromMaskedInt16x16 [a] x y mask) => (VPSHRDWMasked256 [a] x y (VPMOVVec16x16ToM mask)) +(ShiftAllRightAndFillUpperFromMaskedInt16x32 [a] x y mask) => (VPSHRDWMasked512 [a] x y (VPMOVVec16x32ToM mask)) +(ShiftAllRightAndFillUpperFromMaskedInt32x4 [a] x y mask) => (VPSHRDDMasked128 [a] x y (VPMOVVec32x4ToM mask)) +(ShiftAllRightAndFillUpperFromMaskedInt32x8 [a] x y mask) => (VPSHRDDMasked256 [a] x y (VPMOVVec32x8ToM mask)) +(ShiftAllRightAndFillUpperFromMaskedInt32x16 [a] x y mask) => (VPSHRDDMasked512 [a] x y (VPMOVVec32x16ToM mask)) +(ShiftAllRightAndFillUpperFromMaskedInt64x2 [a] x y mask) => (VPSHRDQMasked128 [a] x y (VPMOVVec64x2ToM mask)) +(ShiftAllRightAndFillUpperFromMaskedInt64x4 [a] x y mask) => (VPSHRDQMasked256 [a] x y (VPMOVVec64x4ToM mask)) +(ShiftAllRightAndFillUpperFromMaskedInt64x8 [a] x y mask) => (VPSHRDQMasked512 [a] x y (VPMOVVec64x8ToM mask)) +(ShiftAllRightAndFillUpperFromMaskedUint16x8 [a] x y mask) => (VPSHRDWMasked128 [a] x y (VPMOVVec16x8ToM mask)) +(ShiftAllRightAndFillUpperFromMaskedUint16x16 [a] x y mask) => (VPSHRDWMasked256 [a] x y (VPMOVVec16x16ToM mask)) +(ShiftAllRightAndFillUpperFromMaskedUint16x32 [a] x y mask) => (VPSHRDWMasked512 [a] x y (VPMOVVec16x32ToM mask)) +(ShiftAllRightAndFillUpperFromMaskedUint32x4 [a] x y mask) => (VPSHRDDMasked128 [a] x y (VPMOVVec32x4ToM mask)) +(ShiftAllRightAndFillUpperFromMaskedUint32x8 [a] x y mask) => (VPSHRDDMasked256 [a] x y (VPMOVVec32x8ToM mask)) +(ShiftAllRightAndFillUpperFromMaskedUint32x16 [a] x y mask) => (VPSHRDDMasked512 [a] x y (VPMOVVec32x16ToM mask)) +(ShiftAllRightAndFillUpperFromMaskedUint64x2 [a] x y mask) => (VPSHRDQMasked128 [a] x y (VPMOVVec64x2ToM mask)) +(ShiftAllRightAndFillUpperFromMaskedUint64x4 [a] x y mask) => (VPSHRDQMasked256 [a] x y (VPMOVVec64x4ToM mask)) +(ShiftAllRightAndFillUpperFromMaskedUint64x8 [a] x y mask) => (VPSHRDQMasked512 [a] x y (VPMOVVec64x8ToM mask)) +(ShiftAllRightMaskedInt64x2 x y mask) => (VPSRLQMasked128 x y (VPMOVVec64x2ToM mask)) +(ShiftAllRightMaskedInt64x4 x y mask) => (VPSRLQMasked256 x y (VPMOVVec64x4ToM mask)) +(ShiftAllRightMaskedInt64x8 x y mask) => (VPSRLQMasked512 x y (VPMOVVec64x8ToM mask)) +(ShiftAllRightMaskedUint64x2 x y mask) => (VPSRLQMasked128 x y (VPMOVVec64x2ToM mask)) +(ShiftAllRightMaskedUint64x4 x y mask) => (VPSRLQMasked256 x y (VPMOVVec64x4ToM mask)) +(ShiftAllRightMaskedUint64x8 x y mask) => (VPSRLQMasked512 x y (VPMOVVec64x8ToM mask)) (ShiftAllRightSignExtendedInt16x8 ...) => (VPSRAW128 ...) (ShiftAllRightSignExtendedInt16x16 ...) => (VPSRAW256 ...) (ShiftAllRightSignExtendedInt32x4 ...) => (VPSRAD128 ...) @@ -1509,6 +1356,9 @@ (ShiftAllRightSignExtendedInt64x2 ...) => (VPSRAQ128 ...) (ShiftAllRightSignExtendedInt64x4 ...) => (VPSRAQ256 ...) (ShiftAllRightSignExtendedInt64x8 ...) => (VPSRAQ512 ...) +(ShiftAllRightSignExtendedMaskedInt64x2 x y mask) => (VPSRAQMasked128 x y (VPMOVVec64x2ToM mask)) +(ShiftAllRightSignExtendedMaskedInt64x4 x y mask) => (VPSRAQMasked256 x y (VPMOVVec64x4ToM mask)) +(ShiftAllRightSignExtendedMaskedInt64x8 x y mask) => (VPSRAQMasked512 x y (VPMOVVec64x8ToM mask)) (ShiftLeftInt16x8 ...) => (VPSLLVW128 ...) (ShiftLeftInt16x16 ...) => (VPSLLVW256 ...) (ShiftLeftInt16x32 ...) => (VPSLLVW512 ...) @@ -1545,6 +1395,42 @@ (ShiftLeftAndFillUpperFromUint64x2 ...) => (VPSHLDVQ128 ...) (ShiftLeftAndFillUpperFromUint64x4 ...) => (VPSHLDVQ256 ...) (ShiftLeftAndFillUpperFromUint64x8 ...) => (VPSHLDVQ512 ...) +(ShiftLeftAndFillUpperFromMaskedInt16x8 x y z mask) => (VPSHLDVWMasked128 x y z (VPMOVVec16x8ToM mask)) +(ShiftLeftAndFillUpperFromMaskedInt16x16 x y z mask) => (VPSHLDVWMasked256 x y z (VPMOVVec16x16ToM mask)) +(ShiftLeftAndFillUpperFromMaskedInt16x32 x y z mask) => (VPSHLDVWMasked512 x y z (VPMOVVec16x32ToM mask)) +(ShiftLeftAndFillUpperFromMaskedInt32x4 x y z mask) => (VPSHLDVDMasked128 x y z (VPMOVVec32x4ToM mask)) +(ShiftLeftAndFillUpperFromMaskedInt32x8 x y z mask) => (VPSHLDVDMasked256 x y z (VPMOVVec32x8ToM mask)) +(ShiftLeftAndFillUpperFromMaskedInt32x16 x y z mask) => (VPSHLDVDMasked512 x y z (VPMOVVec32x16ToM mask)) +(ShiftLeftAndFillUpperFromMaskedInt64x2 x y z mask) => (VPSHLDVQMasked128 x y z (VPMOVVec64x2ToM mask)) +(ShiftLeftAndFillUpperFromMaskedInt64x4 x y z mask) => (VPSHLDVQMasked256 x y z (VPMOVVec64x4ToM mask)) +(ShiftLeftAndFillUpperFromMaskedInt64x8 x y z mask) => (VPSHLDVQMasked512 x y z (VPMOVVec64x8ToM mask)) +(ShiftLeftAndFillUpperFromMaskedUint16x8 x y z mask) => (VPSHLDVWMasked128 x y z (VPMOVVec16x8ToM mask)) +(ShiftLeftAndFillUpperFromMaskedUint16x16 x y z mask) => (VPSHLDVWMasked256 x y z (VPMOVVec16x16ToM mask)) +(ShiftLeftAndFillUpperFromMaskedUint16x32 x y z mask) => (VPSHLDVWMasked512 x y z (VPMOVVec16x32ToM mask)) +(ShiftLeftAndFillUpperFromMaskedUint32x4 x y z mask) => (VPSHLDVDMasked128 x y z (VPMOVVec32x4ToM mask)) +(ShiftLeftAndFillUpperFromMaskedUint32x8 x y z mask) => (VPSHLDVDMasked256 x y z (VPMOVVec32x8ToM mask)) +(ShiftLeftAndFillUpperFromMaskedUint32x16 x y z mask) => (VPSHLDVDMasked512 x y z (VPMOVVec32x16ToM mask)) +(ShiftLeftAndFillUpperFromMaskedUint64x2 x y z mask) => (VPSHLDVQMasked128 x y z (VPMOVVec64x2ToM mask)) +(ShiftLeftAndFillUpperFromMaskedUint64x4 x y z mask) => (VPSHLDVQMasked256 x y z (VPMOVVec64x4ToM mask)) +(ShiftLeftAndFillUpperFromMaskedUint64x8 x y z mask) => (VPSHLDVQMasked512 x y z (VPMOVVec64x8ToM mask)) +(ShiftLeftMaskedInt16x8 x y mask) => (VPSLLVWMasked128 x y (VPMOVVec16x8ToM mask)) +(ShiftLeftMaskedInt16x16 x y mask) => (VPSLLVWMasked256 x y (VPMOVVec16x16ToM mask)) +(ShiftLeftMaskedInt16x32 x y mask) => (VPSLLVWMasked512 x y (VPMOVVec16x32ToM mask)) +(ShiftLeftMaskedInt32x4 x y mask) => (VPSLLVDMasked128 x y (VPMOVVec32x4ToM mask)) +(ShiftLeftMaskedInt32x8 x y mask) => (VPSLLVDMasked256 x y (VPMOVVec32x8ToM mask)) +(ShiftLeftMaskedInt32x16 x y mask) => (VPSLLVDMasked512 x y (VPMOVVec32x16ToM mask)) +(ShiftLeftMaskedInt64x2 x y mask) => (VPSLLVQMasked128 x y (VPMOVVec64x2ToM mask)) +(ShiftLeftMaskedInt64x4 x y mask) => (VPSLLVQMasked256 x y (VPMOVVec64x4ToM mask)) +(ShiftLeftMaskedInt64x8 x y mask) => (VPSLLVQMasked512 x y (VPMOVVec64x8ToM mask)) +(ShiftLeftMaskedUint16x8 x y mask) => (VPSLLVWMasked128 x y (VPMOVVec16x8ToM mask)) +(ShiftLeftMaskedUint16x16 x y mask) => (VPSLLVWMasked256 x y (VPMOVVec16x16ToM mask)) +(ShiftLeftMaskedUint16x32 x y mask) => (VPSLLVWMasked512 x y (VPMOVVec16x32ToM mask)) +(ShiftLeftMaskedUint32x4 x y mask) => (VPSLLVDMasked128 x y (VPMOVVec32x4ToM mask)) +(ShiftLeftMaskedUint32x8 x y mask) => (VPSLLVDMasked256 x y (VPMOVVec32x8ToM mask)) +(ShiftLeftMaskedUint32x16 x y mask) => (VPSLLVDMasked512 x y (VPMOVVec32x16ToM mask)) +(ShiftLeftMaskedUint64x2 x y mask) => (VPSLLVQMasked128 x y (VPMOVVec64x2ToM mask)) +(ShiftLeftMaskedUint64x4 x y mask) => (VPSLLVQMasked256 x y (VPMOVVec64x4ToM mask)) +(ShiftLeftMaskedUint64x8 x y mask) => (VPSLLVQMasked512 x y (VPMOVVec64x8ToM mask)) (ShiftRightInt16x8 ...) => (VPSRLVW128 ...) (ShiftRightInt16x16 ...) => (VPSRLVW256 ...) (ShiftRightInt16x32 ...) => (VPSRLVW512 ...) @@ -1581,6 +1467,42 @@ (ShiftRightAndFillUpperFromUint64x2 ...) => (VPSHRDVQ128 ...) (ShiftRightAndFillUpperFromUint64x4 ...) => (VPSHRDVQ256 ...) (ShiftRightAndFillUpperFromUint64x8 ...) => (VPSHRDVQ512 ...) +(ShiftRightAndFillUpperFromMaskedInt16x8 x y z mask) => (VPSHRDVWMasked128 x y z (VPMOVVec16x8ToM mask)) +(ShiftRightAndFillUpperFromMaskedInt16x16 x y z mask) => (VPSHRDVWMasked256 x y z (VPMOVVec16x16ToM mask)) +(ShiftRightAndFillUpperFromMaskedInt16x32 x y z mask) => (VPSHRDVWMasked512 x y z (VPMOVVec16x32ToM mask)) +(ShiftRightAndFillUpperFromMaskedInt32x4 x y z mask) => (VPSHRDVDMasked128 x y z (VPMOVVec32x4ToM mask)) +(ShiftRightAndFillUpperFromMaskedInt32x8 x y z mask) => (VPSHRDVDMasked256 x y z (VPMOVVec32x8ToM mask)) +(ShiftRightAndFillUpperFromMaskedInt32x16 x y z mask) => (VPSHRDVDMasked512 x y z (VPMOVVec32x16ToM mask)) +(ShiftRightAndFillUpperFromMaskedInt64x2 x y z mask) => (VPSHRDVQMasked128 x y z (VPMOVVec64x2ToM mask)) +(ShiftRightAndFillUpperFromMaskedInt64x4 x y z mask) => (VPSHRDVQMasked256 x y z (VPMOVVec64x4ToM mask)) +(ShiftRightAndFillUpperFromMaskedInt64x8 x y z mask) => (VPSHRDVQMasked512 x y z (VPMOVVec64x8ToM mask)) +(ShiftRightAndFillUpperFromMaskedUint16x8 x y z mask) => (VPSHRDVWMasked128 x y z (VPMOVVec16x8ToM mask)) +(ShiftRightAndFillUpperFromMaskedUint16x16 x y z mask) => (VPSHRDVWMasked256 x y z (VPMOVVec16x16ToM mask)) +(ShiftRightAndFillUpperFromMaskedUint16x32 x y z mask) => (VPSHRDVWMasked512 x y z (VPMOVVec16x32ToM mask)) +(ShiftRightAndFillUpperFromMaskedUint32x4 x y z mask) => (VPSHRDVDMasked128 x y z (VPMOVVec32x4ToM mask)) +(ShiftRightAndFillUpperFromMaskedUint32x8 x y z mask) => (VPSHRDVDMasked256 x y z (VPMOVVec32x8ToM mask)) +(ShiftRightAndFillUpperFromMaskedUint32x16 x y z mask) => (VPSHRDVDMasked512 x y z (VPMOVVec32x16ToM mask)) +(ShiftRightAndFillUpperFromMaskedUint64x2 x y z mask) => (VPSHRDVQMasked128 x y z (VPMOVVec64x2ToM mask)) +(ShiftRightAndFillUpperFromMaskedUint64x4 x y z mask) => (VPSHRDVQMasked256 x y z (VPMOVVec64x4ToM mask)) +(ShiftRightAndFillUpperFromMaskedUint64x8 x y z mask) => (VPSHRDVQMasked512 x y z (VPMOVVec64x8ToM mask)) +(ShiftRightMaskedInt16x8 x y mask) => (VPSRLVWMasked128 x y (VPMOVVec16x8ToM mask)) +(ShiftRightMaskedInt16x16 x y mask) => (VPSRLVWMasked256 x y (VPMOVVec16x16ToM mask)) +(ShiftRightMaskedInt16x32 x y mask) => (VPSRLVWMasked512 x y (VPMOVVec16x32ToM mask)) +(ShiftRightMaskedInt32x4 x y mask) => (VPSRLVDMasked128 x y (VPMOVVec32x4ToM mask)) +(ShiftRightMaskedInt32x8 x y mask) => (VPSRLVDMasked256 x y (VPMOVVec32x8ToM mask)) +(ShiftRightMaskedInt32x16 x y mask) => (VPSRLVDMasked512 x y (VPMOVVec32x16ToM mask)) +(ShiftRightMaskedInt64x2 x y mask) => (VPSRLVQMasked128 x y (VPMOVVec64x2ToM mask)) +(ShiftRightMaskedInt64x4 x y mask) => (VPSRLVQMasked256 x y (VPMOVVec64x4ToM mask)) +(ShiftRightMaskedInt64x8 x y mask) => (VPSRLVQMasked512 x y (VPMOVVec64x8ToM mask)) +(ShiftRightMaskedUint16x8 x y mask) => (VPSRLVWMasked128 x y (VPMOVVec16x8ToM mask)) +(ShiftRightMaskedUint16x16 x y mask) => (VPSRLVWMasked256 x y (VPMOVVec16x16ToM mask)) +(ShiftRightMaskedUint16x32 x y mask) => (VPSRLVWMasked512 x y (VPMOVVec16x32ToM mask)) +(ShiftRightMaskedUint32x4 x y mask) => (VPSRLVDMasked128 x y (VPMOVVec32x4ToM mask)) +(ShiftRightMaskedUint32x8 x y mask) => (VPSRLVDMasked256 x y (VPMOVVec32x8ToM mask)) +(ShiftRightMaskedUint32x16 x y mask) => (VPSRLVDMasked512 x y (VPMOVVec32x16ToM mask)) +(ShiftRightMaskedUint64x2 x y mask) => (VPSRLVQMasked128 x y (VPMOVVec64x2ToM mask)) +(ShiftRightMaskedUint64x4 x y mask) => (VPSRLVQMasked256 x y (VPMOVVec64x4ToM mask)) +(ShiftRightMaskedUint64x8 x y mask) => (VPSRLVQMasked512 x y (VPMOVVec64x8ToM mask)) (ShiftRightSignExtendedInt16x8 ...) => (VPSRAVW128 ...) (ShiftRightSignExtendedInt16x16 ...) => (VPSRAVW256 ...) (ShiftRightSignExtendedInt16x32 ...) => (VPSRAVW512 ...) @@ -1599,6 +1521,24 @@ (ShiftRightSignExtendedUint64x2 ...) => (VPSRAVQ128 ...) (ShiftRightSignExtendedUint64x4 ...) => (VPSRAVQ256 ...) (ShiftRightSignExtendedUint64x8 ...) => (VPSRAVQ512 ...) +(ShiftRightSignExtendedMaskedInt16x8 x y mask) => (VPSRAVWMasked128 x y (VPMOVVec16x8ToM mask)) +(ShiftRightSignExtendedMaskedInt16x16 x y mask) => (VPSRAVWMasked256 x y (VPMOVVec16x16ToM mask)) +(ShiftRightSignExtendedMaskedInt16x32 x y mask) => (VPSRAVWMasked512 x y (VPMOVVec16x32ToM mask)) +(ShiftRightSignExtendedMaskedInt32x4 x y mask) => (VPSRAVDMasked128 x y (VPMOVVec32x4ToM mask)) +(ShiftRightSignExtendedMaskedInt32x8 x y mask) => (VPSRAVDMasked256 x y (VPMOVVec32x8ToM mask)) +(ShiftRightSignExtendedMaskedInt32x16 x y mask) => (VPSRAVDMasked512 x y (VPMOVVec32x16ToM mask)) +(ShiftRightSignExtendedMaskedInt64x2 x y mask) => (VPSRAVQMasked128 x y (VPMOVVec64x2ToM mask)) +(ShiftRightSignExtendedMaskedInt64x4 x y mask) => (VPSRAVQMasked256 x y (VPMOVVec64x4ToM mask)) +(ShiftRightSignExtendedMaskedInt64x8 x y mask) => (VPSRAVQMasked512 x y (VPMOVVec64x8ToM mask)) +(ShiftRightSignExtendedMaskedUint16x8 x y mask) => (VPSRAVWMasked128 x y (VPMOVVec16x8ToM mask)) +(ShiftRightSignExtendedMaskedUint16x16 x y mask) => (VPSRAVWMasked256 x y (VPMOVVec16x16ToM mask)) +(ShiftRightSignExtendedMaskedUint16x32 x y mask) => (VPSRAVWMasked512 x y (VPMOVVec16x32ToM mask)) +(ShiftRightSignExtendedMaskedUint32x4 x y mask) => (VPSRAVDMasked128 x y (VPMOVVec32x4ToM mask)) +(ShiftRightSignExtendedMaskedUint32x8 x y mask) => (VPSRAVDMasked256 x y (VPMOVVec32x8ToM mask)) +(ShiftRightSignExtendedMaskedUint32x16 x y mask) => (VPSRAVDMasked512 x y (VPMOVVec32x16ToM mask)) +(ShiftRightSignExtendedMaskedUint64x2 x y mask) => (VPSRAVQMasked128 x y (VPMOVVec64x2ToM mask)) +(ShiftRightSignExtendedMaskedUint64x4 x y mask) => (VPSRAVQMasked256 x y (VPMOVVec64x4ToM mask)) +(ShiftRightSignExtendedMaskedUint64x8 x y mask) => (VPSRAVQMasked512 x y (VPMOVVec64x8ToM mask)) (SignInt8x16 ...) => (VPSIGNB128 ...) (SignInt8x32 ...) => (VPSIGNB256 ...) (SignInt16x8 ...) => (VPSIGNW128 ...) @@ -1611,6 +1551,12 @@ (SqrtFloat64x2 ...) => (VSQRTPD128 ...) (SqrtFloat64x4 ...) => (VSQRTPD256 ...) (SqrtFloat64x8 ...) => (VSQRTPD512 ...) +(SqrtMaskedFloat32x4 x mask) => (VSQRTPSMasked128 x (VPMOVVec32x4ToM mask)) +(SqrtMaskedFloat32x8 x mask) => (VSQRTPSMasked256 x (VPMOVVec32x8ToM mask)) +(SqrtMaskedFloat32x16 x mask) => (VSQRTPSMasked512 x (VPMOVVec32x16ToM mask)) +(SqrtMaskedFloat64x2 x mask) => (VSQRTPDMasked128 x (VPMOVVec64x2ToM mask)) +(SqrtMaskedFloat64x4 x mask) => (VSQRTPDMasked256 x (VPMOVVec64x4ToM mask)) +(SqrtMaskedFloat64x8 x mask) => (VSQRTPDMasked512 x (VPMOVVec64x8ToM mask)) (SubFloat32x4 ...) => (VSUBPS128 ...) (SubFloat32x8 ...) => (VSUBPS256 ...) (SubFloat32x16 ...) => (VSUBPS512 ...) @@ -1641,6 +1587,36 @@ (SubUint64x2 ...) => (VPSUBQ128 ...) (SubUint64x4 ...) => (VPSUBQ256 ...) (SubUint64x8 ...) => (VPSUBQ512 ...) +(SubMaskedFloat32x4 x y mask) => (VSUBPSMasked128 x y (VPMOVVec32x4ToM mask)) +(SubMaskedFloat32x8 x y mask) => (VSUBPSMasked256 x y (VPMOVVec32x8ToM mask)) +(SubMaskedFloat32x16 x y mask) => (VSUBPSMasked512 x y (VPMOVVec32x16ToM mask)) +(SubMaskedFloat64x2 x y mask) => (VSUBPDMasked128 x y (VPMOVVec64x2ToM mask)) +(SubMaskedFloat64x4 x y mask) => (VSUBPDMasked256 x y (VPMOVVec64x4ToM mask)) +(SubMaskedFloat64x8 x y mask) => (VSUBPDMasked512 x y (VPMOVVec64x8ToM mask)) +(SubMaskedInt8x16 x y mask) => (VPSUBBMasked128 x y (VPMOVVec8x16ToM mask)) +(SubMaskedInt8x32 x y mask) => (VPSUBBMasked256 x y (VPMOVVec8x32ToM mask)) +(SubMaskedInt8x64 x y mask) => (VPSUBBMasked512 x y (VPMOVVec8x64ToM mask)) +(SubMaskedInt16x8 x y mask) => (VPSUBWMasked128 x y (VPMOVVec16x8ToM mask)) +(SubMaskedInt16x16 x y mask) => (VPSUBWMasked256 x y (VPMOVVec16x16ToM mask)) +(SubMaskedInt16x32 x y mask) => (VPSUBWMasked512 x y (VPMOVVec16x32ToM mask)) +(SubMaskedInt32x4 x y mask) => (VPSUBDMasked128 x y (VPMOVVec32x4ToM mask)) +(SubMaskedInt32x8 x y mask) => (VPSUBDMasked256 x y (VPMOVVec32x8ToM mask)) +(SubMaskedInt32x16 x y mask) => (VPSUBDMasked512 x y (VPMOVVec32x16ToM mask)) +(SubMaskedInt64x2 x y mask) => (VPSUBQMasked128 x y (VPMOVVec64x2ToM mask)) +(SubMaskedInt64x4 x y mask) => (VPSUBQMasked256 x y (VPMOVVec64x4ToM mask)) +(SubMaskedInt64x8 x y mask) => (VPSUBQMasked512 x y (VPMOVVec64x8ToM mask)) +(SubMaskedUint8x16 x y mask) => (VPSUBBMasked128 x y (VPMOVVec8x16ToM mask)) +(SubMaskedUint8x32 x y mask) => (VPSUBBMasked256 x y (VPMOVVec8x32ToM mask)) +(SubMaskedUint8x64 x y mask) => (VPSUBBMasked512 x y (VPMOVVec8x64ToM mask)) +(SubMaskedUint16x8 x y mask) => (VPSUBWMasked128 x y (VPMOVVec16x8ToM mask)) +(SubMaskedUint16x16 x y mask) => (VPSUBWMasked256 x y (VPMOVVec16x16ToM mask)) +(SubMaskedUint16x32 x y mask) => (VPSUBWMasked512 x y (VPMOVVec16x32ToM mask)) +(SubMaskedUint32x4 x y mask) => (VPSUBDMasked128 x y (VPMOVVec32x4ToM mask)) +(SubMaskedUint32x8 x y mask) => (VPSUBDMasked256 x y (VPMOVVec32x8ToM mask)) +(SubMaskedUint32x16 x y mask) => (VPSUBDMasked512 x y (VPMOVVec32x16ToM mask)) +(SubMaskedUint64x2 x y mask) => (VPSUBQMasked128 x y (VPMOVVec64x2ToM mask)) +(SubMaskedUint64x4 x y mask) => (VPSUBQMasked256 x y (VPMOVVec64x4ToM mask)) +(SubMaskedUint64x8 x y mask) => (VPSUBQMasked512 x y (VPMOVVec64x8ToM mask)) (TruncFloat32x4 x) => (VROUNDPS128 [3] x) (TruncFloat32x8 x) => (VROUNDPS256 [3] x) (TruncFloat64x2 x) => (VROUNDPD128 [3] x) @@ -1651,12 +1627,24 @@ (TruncWithPrecisionFloat64x2 [a] x) => (VRNDSCALEPD128 [a+3] x) (TruncWithPrecisionFloat64x4 [a] x) => (VRNDSCALEPD256 [a+3] x) (TruncWithPrecisionFloat64x8 [a] x) => (VRNDSCALEPD512 [a+3] x) +(TruncWithPrecisionMaskedFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+3] x (VPMOVVec32x4ToM mask)) +(TruncWithPrecisionMaskedFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+3] x (VPMOVVec32x8ToM mask)) +(TruncWithPrecisionMaskedFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+3] x (VPMOVVec32x16ToM mask)) +(TruncWithPrecisionMaskedFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+3] x (VPMOVVec64x2ToM mask)) +(TruncWithPrecisionMaskedFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+3] x (VPMOVVec64x4ToM mask)) +(TruncWithPrecisionMaskedFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+3] x (VPMOVVec64x8ToM mask)) (UnsignedSignedQuadDotProdAccumulateInt32x4 ...) => (VPDPBUSD128 ...) (UnsignedSignedQuadDotProdAccumulateInt32x8 ...) => (VPDPBUSD256 ...) (UnsignedSignedQuadDotProdAccumulateInt32x16 ...) => (VPDPBUSD512 ...) (UnsignedSignedQuadDotProdAccumulateUint32x4 ...) => (VPDPBUSD128 ...) (UnsignedSignedQuadDotProdAccumulateUint32x8 ...) => (VPDPBUSD256 ...) (UnsignedSignedQuadDotProdAccumulateUint32x16 ...) => (VPDPBUSD512 ...) +(UnsignedSignedQuadDotProdAccumulateMaskedInt32x4 x y z mask) => (VPDPBUSDMasked128 x y z (VPMOVVec32x4ToM mask)) +(UnsignedSignedQuadDotProdAccumulateMaskedInt32x8 x y z mask) => (VPDPBUSDMasked256 x y z (VPMOVVec32x8ToM mask)) +(UnsignedSignedQuadDotProdAccumulateMaskedInt32x16 x y z mask) => (VPDPBUSDMasked512 x y z (VPMOVVec32x16ToM mask)) +(UnsignedSignedQuadDotProdAccumulateMaskedUint32x4 x y z mask) => (VPDPBUSDMasked128 x y z (VPMOVVec32x4ToM mask)) +(UnsignedSignedQuadDotProdAccumulateMaskedUint32x8 x y z mask) => (VPDPBUSDMasked256 x y z (VPMOVVec32x8ToM mask)) +(UnsignedSignedQuadDotProdAccumulateMaskedUint32x16 x y z mask) => (VPDPBUSDMasked512 x y z (VPMOVVec32x16ToM mask)) (XorInt8x16 ...) => (VPXOR128 ...) (XorInt8x32 ...) => (VPXOR256 ...) (XorInt16x8 ...) => (VPXOR128 ...) @@ -1677,3 +1665,15 @@ (XorUint64x2 ...) => (VPXOR128 ...) (XorUint64x4 ...) => (VPXOR256 ...) (XorUint64x8 ...) => (VPXORQ512 ...) +(XorMaskedInt32x4 x y mask) => (VPXORDMasked128 x y (VPMOVVec32x4ToM mask)) +(XorMaskedInt32x8 x y mask) => (VPXORDMasked256 x y (VPMOVVec32x8ToM mask)) +(XorMaskedInt32x16 x y mask) => (VPXORDMasked512 x y (VPMOVVec32x16ToM mask)) +(XorMaskedInt64x2 x y mask) => (VPXORQMasked128 x y (VPMOVVec64x2ToM mask)) +(XorMaskedInt64x4 x y mask) => (VPXORQMasked256 x y (VPMOVVec64x4ToM mask)) +(XorMaskedInt64x8 x y mask) => (VPXORQMasked512 x y (VPMOVVec64x8ToM mask)) +(XorMaskedUint32x4 x y mask) => (VPXORDMasked128 x y (VPMOVVec32x4ToM mask)) +(XorMaskedUint32x8 x y mask) => (VPXORDMasked256 x y (VPMOVVec32x8ToM mask)) +(XorMaskedUint32x16 x y mask) => (VPXORDMasked512 x y (VPMOVVec32x16ToM mask)) +(XorMaskedUint64x2 x y mask) => (VPXORQMasked128 x y (VPMOVVec64x2ToM mask)) +(XorMaskedUint64x4 x y mask) => (VPXORQMasked256 x y (VPMOVVec64x4ToM mask)) +(XorMaskedUint64x8 x y mask) => (VPXORQMasked512 x y (VPMOVVec64x8ToM mask)) diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go index 892ecc4043..5abaa4a0bc 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go +++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go @@ -4,836 +4,836 @@ package main func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp, fpgp regInfo) []opData { return []opData{ {name: "VADDPS512", argLength: 2, reg: fp21, asm: "VADDPS", commutative: true, typ: "Vec512", resultInArg0: false}, - {name: "VRCP14PS512", argLength: 1, reg: fp11, asm: "VRCP14PS", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VRSQRT14PS512", argLength: 1, reg: fp11, asm: "VRSQRT14PS", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VDIVPS512", argLength: 2, reg: fp21, asm: "VDIVPS", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VFMADD213PS512", argLength: 3, reg: fp31, asm: "VFMADD213PS", commutative: false, typ: "Vec512", resultInArg0: true}, - {name: "VFMADDSUB213PS512", argLength: 3, reg: fp31, asm: "VFMADDSUB213PS", commutative: false, typ: "Vec512", resultInArg0: true}, - {name: "VFMSUBADD213PS512", argLength: 3, reg: fp31, asm: "VFMSUBADD213PS", commutative: false, typ: "Vec512", resultInArg0: true}, {name: "VADDPSMasked512", argLength: 3, reg: fp2kfp, asm: "VADDPS", commutative: true, typ: "Vec512", resultInArg0: false}, + {name: "VRCP14PS512", argLength: 1, reg: fp11, asm: "VRCP14PS", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VRCP14PSMasked512", argLength: 2, reg: fpkfp, asm: "VRCP14PS", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VRSQRT14PS512", argLength: 1, reg: fp11, asm: "VRSQRT14PS", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VRSQRT14PSMasked512", argLength: 2, reg: fpkfp, asm: "VRSQRT14PS", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VDIVPS512", argLength: 2, reg: fp21, asm: "VDIVPS", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VDIVPSMasked512", argLength: 3, reg: fp2kfp, asm: "VDIVPS", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VFMADD213PS512", argLength: 3, reg: fp31, asm: "VFMADD213PS", commutative: false, typ: "Vec512", resultInArg0: true}, {name: "VFMADD213PSMasked512", argLength: 4, reg: fp3kfp, asm: "VFMADD213PS", commutative: false, typ: "Vec512", resultInArg0: true}, + {name: "VFMADDSUB213PS512", argLength: 3, reg: fp31, asm: "VFMADDSUB213PS", commutative: false, typ: "Vec512", resultInArg0: true}, {name: "VFMADDSUB213PSMasked512", argLength: 4, reg: fp3kfp, asm: "VFMADDSUB213PS", commutative: false, typ: "Vec512", resultInArg0: true}, + {name: "VFMSUBADD213PS512", argLength: 3, reg: fp31, asm: "VFMSUBADD213PS", commutative: false, typ: "Vec512", resultInArg0: true}, {name: "VFMSUBADD213PSMasked512", argLength: 4, reg: fp3kfp, asm: "VFMSUBADD213PS", commutative: false, typ: "Vec512", resultInArg0: true}, - {name: "VMAXPSMasked512", argLength: 3, reg: fp2kfp, asm: "VMAXPS", commutative: true, typ: "Vec512", resultInArg0: false}, - {name: "VMINPSMasked512", argLength: 3, reg: fp2kfp, asm: "VMINPS", commutative: true, typ: "Vec512", resultInArg0: false}, - {name: "VMULPSMasked512", argLength: 3, reg: fp2kfp, asm: "VMULPS", commutative: true, typ: "Vec512", resultInArg0: false}, - {name: "VSCALEFPSMasked512", argLength: 3, reg: fp2kfp, asm: "VSCALEFPS", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VSQRTPSMasked512", argLength: 2, reg: fpkfp, asm: "VSQRTPS", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VSUBPSMasked512", argLength: 3, reg: fp2kfp, asm: "VSUBPS", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VMAXPS512", argLength: 2, reg: fp21, asm: "VMAXPS", commutative: true, typ: "Vec512", resultInArg0: false}, + {name: "VMAXPSMasked512", argLength: 3, reg: fp2kfp, asm: "VMAXPS", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VMINPS512", argLength: 2, reg: fp21, asm: "VMINPS", commutative: true, typ: "Vec512", resultInArg0: false}, + {name: "VMINPSMasked512", argLength: 3, reg: fp2kfp, asm: "VMINPS", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VMULPS512", argLength: 2, reg: fp21, asm: "VMULPS", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VSCALEFPS512", argLength: 2, reg: fp21, asm: "VSCALEFPS", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VSCALEFPSMasked512", argLength: 3, reg: fp2kfp, asm: "VSCALEFPS", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VMULPSMasked512", argLength: 3, reg: fp2kfp, asm: "VMULPS", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VSQRTPS512", argLength: 1, reg: fp11, asm: "VSQRTPS", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VSQRTPSMasked512", argLength: 2, reg: fpkfp, asm: "VSQRTPS", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VSUBPS512", argLength: 2, reg: fp21, asm: "VSUBPS", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VSUBPSMasked512", argLength: 3, reg: fp2kfp, asm: "VSUBPS", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VADDPS128", argLength: 2, reg: fp21, asm: "VADDPS", commutative: true, typ: "Vec128", resultInArg0: false}, + {name: "VADDPSMasked128", argLength: 3, reg: fp2kfp, asm: "VADDPS", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VADDSUBPS128", argLength: 2, reg: fp21, asm: "VADDSUBPS", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VRCP14PS128", argLength: 1, reg: fp11, asm: "VRCP14PS", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VRSQRTPS128", argLength: 1, reg: fp11, asm: "VRSQRTPS", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VDIVPS128", argLength: 2, reg: fp21, asm: "VDIVPS", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VFMADD213PS128", argLength: 3, reg: fp31, asm: "VFMADD213PS", commutative: false, typ: "Vec128", resultInArg0: true}, - {name: "VFMADDSUB213PS128", argLength: 3, reg: fp31, asm: "VFMADDSUB213PS", commutative: false, typ: "Vec128", resultInArg0: true}, - {name: "VFMSUBADD213PS128", argLength: 3, reg: fp31, asm: "VFMSUBADD213PS", commutative: false, typ: "Vec128", resultInArg0: true}, - {name: "VADDPSMasked128", argLength: 3, reg: fp2kfp, asm: "VADDPS", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VRCP14PSMasked128", argLength: 2, reg: fpkfp, asm: "VRCP14PS", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VRSQRTPS128", argLength: 1, reg: fp11, asm: "VRSQRTPS", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VRSQRT14PSMasked128", argLength: 2, reg: fpkfp, asm: "VRSQRT14PS", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VDIVPS128", argLength: 2, reg: fp21, asm: "VDIVPS", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VDIVPSMasked128", argLength: 3, reg: fp2kfp, asm: "VDIVPS", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VFMADD213PS128", argLength: 3, reg: fp31, asm: "VFMADD213PS", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VFMADD213PSMasked128", argLength: 4, reg: fp3kfp, asm: "VFMADD213PS", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VFMADDSUB213PS128", argLength: 3, reg: fp31, asm: "VFMADDSUB213PS", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VFMADDSUB213PSMasked128", argLength: 4, reg: fp3kfp, asm: "VFMADDSUB213PS", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VFMSUBADD213PS128", argLength: 3, reg: fp31, asm: "VFMSUBADD213PS", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VFMSUBADD213PSMasked128", argLength: 4, reg: fp3kfp, asm: "VFMSUBADD213PS", commutative: false, typ: "Vec128", resultInArg0: true}, - {name: "VMAXPSMasked128", argLength: 3, reg: fp2kfp, asm: "VMAXPS", commutative: true, typ: "Vec128", resultInArg0: false}, - {name: "VMINPSMasked128", argLength: 3, reg: fp2kfp, asm: "VMINPS", commutative: true, typ: "Vec128", resultInArg0: false}, - {name: "VMULPSMasked128", argLength: 3, reg: fp2kfp, asm: "VMULPS", commutative: true, typ: "Vec128", resultInArg0: false}, - {name: "VSCALEFPSMasked128", argLength: 3, reg: fp2kfp, asm: "VSCALEFPS", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VSQRTPSMasked128", argLength: 2, reg: fpkfp, asm: "VSQRTPS", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VSUBPSMasked128", argLength: 3, reg: fp2kfp, asm: "VSUBPS", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VMAXPS128", argLength: 2, reg: fp21, asm: "VMAXPS", commutative: true, typ: "Vec128", resultInArg0: false}, + {name: "VMAXPSMasked128", argLength: 3, reg: fp2kfp, asm: "VMAXPS", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VMINPS128", argLength: 2, reg: fp21, asm: "VMINPS", commutative: true, typ: "Vec128", resultInArg0: false}, + {name: "VMINPSMasked128", argLength: 3, reg: fp2kfp, asm: "VMINPS", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VMULPS128", argLength: 2, reg: fp21, asm: "VMULPS", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VSCALEFPS128", argLength: 2, reg: fp21, asm: "VSCALEFPS", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VSCALEFPSMasked128", argLength: 3, reg: fp2kfp, asm: "VSCALEFPS", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VMULPSMasked128", argLength: 3, reg: fp2kfp, asm: "VMULPS", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VHADDPS128", argLength: 2, reg: fp21, asm: "VHADDPS", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VHSUBPS128", argLength: 2, reg: fp21, asm: "VHSUBPS", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VSQRTPS128", argLength: 1, reg: fp11, asm: "VSQRTPS", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VSQRTPSMasked128", argLength: 2, reg: fpkfp, asm: "VSQRTPS", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VSUBPS128", argLength: 2, reg: fp21, asm: "VSUBPS", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VSUBPSMasked128", argLength: 3, reg: fp2kfp, asm: "VSUBPS", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VADDPS256", argLength: 2, reg: fp21, asm: "VADDPS", commutative: true, typ: "Vec256", resultInArg0: false}, + {name: "VADDPSMasked256", argLength: 3, reg: fp2kfp, asm: "VADDPS", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VADDSUBPS256", argLength: 2, reg: fp21, asm: "VADDSUBPS", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VRCP14PS256", argLength: 1, reg: fp11, asm: "VRCP14PS", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VRSQRTPS256", argLength: 1, reg: fp11, asm: "VRSQRTPS", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VDIVPS256", argLength: 2, reg: fp21, asm: "VDIVPS", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VFMADD213PS256", argLength: 3, reg: fp31, asm: "VFMADD213PS", commutative: false, typ: "Vec256", resultInArg0: true}, - {name: "VFMADDSUB213PS256", argLength: 3, reg: fp31, asm: "VFMADDSUB213PS", commutative: false, typ: "Vec256", resultInArg0: true}, - {name: "VFMSUBADD213PS256", argLength: 3, reg: fp31, asm: "VFMSUBADD213PS", commutative: false, typ: "Vec256", resultInArg0: true}, - {name: "VADDPSMasked256", argLength: 3, reg: fp2kfp, asm: "VADDPS", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VRCP14PSMasked256", argLength: 2, reg: fpkfp, asm: "VRCP14PS", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VRSQRTPS256", argLength: 1, reg: fp11, asm: "VRSQRTPS", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VRSQRT14PSMasked256", argLength: 2, reg: fpkfp, asm: "VRSQRT14PS", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VDIVPS256", argLength: 2, reg: fp21, asm: "VDIVPS", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VDIVPSMasked256", argLength: 3, reg: fp2kfp, asm: "VDIVPS", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VFMADD213PS256", argLength: 3, reg: fp31, asm: "VFMADD213PS", commutative: false, typ: "Vec256", resultInArg0: true}, {name: "VFMADD213PSMasked256", argLength: 4, reg: fp3kfp, asm: "VFMADD213PS", commutative: false, typ: "Vec256", resultInArg0: true}, + {name: "VFMADDSUB213PS256", argLength: 3, reg: fp31, asm: "VFMADDSUB213PS", commutative: false, typ: "Vec256", resultInArg0: true}, {name: "VFMADDSUB213PSMasked256", argLength: 4, reg: fp3kfp, asm: "VFMADDSUB213PS", commutative: false, typ: "Vec256", resultInArg0: true}, + {name: "VFMSUBADD213PS256", argLength: 3, reg: fp31, asm: "VFMSUBADD213PS", commutative: false, typ: "Vec256", resultInArg0: true}, {name: "VFMSUBADD213PSMasked256", argLength: 4, reg: fp3kfp, asm: "VFMSUBADD213PS", commutative: false, typ: "Vec256", resultInArg0: true}, - {name: "VMAXPSMasked256", argLength: 3, reg: fp2kfp, asm: "VMAXPS", commutative: true, typ: "Vec256", resultInArg0: false}, - {name: "VMINPSMasked256", argLength: 3, reg: fp2kfp, asm: "VMINPS", commutative: true, typ: "Vec256", resultInArg0: false}, - {name: "VMULPSMasked256", argLength: 3, reg: fp2kfp, asm: "VMULPS", commutative: true, typ: "Vec256", resultInArg0: false}, - {name: "VSCALEFPSMasked256", argLength: 3, reg: fp2kfp, asm: "VSCALEFPS", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VSQRTPSMasked256", argLength: 2, reg: fpkfp, asm: "VSQRTPS", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VSUBPSMasked256", argLength: 3, reg: fp2kfp, asm: "VSUBPS", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VMAXPS256", argLength: 2, reg: fp21, asm: "VMAXPS", commutative: true, typ: "Vec256", resultInArg0: false}, + {name: "VMAXPSMasked256", argLength: 3, reg: fp2kfp, asm: "VMAXPS", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VMINPS256", argLength: 2, reg: fp21, asm: "VMINPS", commutative: true, typ: "Vec256", resultInArg0: false}, + {name: "VMINPSMasked256", argLength: 3, reg: fp2kfp, asm: "VMINPS", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VMULPS256", argLength: 2, reg: fp21, asm: "VMULPS", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VSCALEFPS256", argLength: 2, reg: fp21, asm: "VSCALEFPS", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VSCALEFPSMasked256", argLength: 3, reg: fp2kfp, asm: "VSCALEFPS", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VMULPSMasked256", argLength: 3, reg: fp2kfp, asm: "VMULPS", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VHADDPS256", argLength: 2, reg: fp21, asm: "VHADDPS", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VHSUBPS256", argLength: 2, reg: fp21, asm: "VHSUBPS", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VSQRTPS256", argLength: 1, reg: fp11, asm: "VSQRTPS", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VSQRTPSMasked256", argLength: 2, reg: fpkfp, asm: "VSQRTPS", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VSUBPS256", argLength: 2, reg: fp21, asm: "VSUBPS", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VSUBPSMasked256", argLength: 3, reg: fp2kfp, asm: "VSUBPS", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VADDPD128", argLength: 2, reg: fp21, asm: "VADDPD", commutative: true, typ: "Vec128", resultInArg0: false}, + {name: "VADDPDMasked128", argLength: 3, reg: fp2kfp, asm: "VADDPD", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VADDSUBPD128", argLength: 2, reg: fp21, asm: "VADDSUBPD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VRCP14PD128", argLength: 1, reg: fp11, asm: "VRCP14PD", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VRSQRT14PD128", argLength: 1, reg: fp11, asm: "VRSQRT14PD", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VDIVPD128", argLength: 2, reg: fp21, asm: "VDIVPD", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VFMADD213PD128", argLength: 3, reg: fp31, asm: "VFMADD213PD", commutative: false, typ: "Vec128", resultInArg0: true}, - {name: "VFMADDSUB213PD128", argLength: 3, reg: fp31, asm: "VFMADDSUB213PD", commutative: false, typ: "Vec128", resultInArg0: true}, - {name: "VFMSUBADD213PD128", argLength: 3, reg: fp31, asm: "VFMSUBADD213PD", commutative: false, typ: "Vec128", resultInArg0: true}, - {name: "VADDPDMasked128", argLength: 3, reg: fp2kfp, asm: "VADDPD", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VRCP14PDMasked128", argLength: 2, reg: fpkfp, asm: "VRCP14PD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VRSQRT14PD128", argLength: 1, reg: fp11, asm: "VRSQRT14PD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VRSQRT14PDMasked128", argLength: 2, reg: fpkfp, asm: "VRSQRT14PD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VDIVPD128", argLength: 2, reg: fp21, asm: "VDIVPD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VDIVPDMasked128", argLength: 3, reg: fp2kfp, asm: "VDIVPD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VFMADD213PD128", argLength: 3, reg: fp31, asm: "VFMADD213PD", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VFMADD213PDMasked128", argLength: 4, reg: fp3kfp, asm: "VFMADD213PD", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VFMADDSUB213PD128", argLength: 3, reg: fp31, asm: "VFMADDSUB213PD", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VFMADDSUB213PDMasked128", argLength: 4, reg: fp3kfp, asm: "VFMADDSUB213PD", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VFMSUBADD213PD128", argLength: 3, reg: fp31, asm: "VFMSUBADD213PD", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VFMSUBADD213PDMasked128", argLength: 4, reg: fp3kfp, asm: "VFMSUBADD213PD", commutative: false, typ: "Vec128", resultInArg0: true}, - {name: "VMAXPDMasked128", argLength: 3, reg: fp2kfp, asm: "VMAXPD", commutative: true, typ: "Vec128", resultInArg0: false}, - {name: "VMINPDMasked128", argLength: 3, reg: fp2kfp, asm: "VMINPD", commutative: true, typ: "Vec128", resultInArg0: false}, - {name: "VMULPDMasked128", argLength: 3, reg: fp2kfp, asm: "VMULPD", commutative: true, typ: "Vec128", resultInArg0: false}, - {name: "VSCALEFPDMasked128", argLength: 3, reg: fp2kfp, asm: "VSCALEFPD", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VSQRTPDMasked128", argLength: 2, reg: fpkfp, asm: "VSQRTPD", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VSUBPDMasked128", argLength: 3, reg: fp2kfp, asm: "VSUBPD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VMAXPD128", argLength: 2, reg: fp21, asm: "VMAXPD", commutative: true, typ: "Vec128", resultInArg0: false}, + {name: "VMAXPDMasked128", argLength: 3, reg: fp2kfp, asm: "VMAXPD", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VMINPD128", argLength: 2, reg: fp21, asm: "VMINPD", commutative: true, typ: "Vec128", resultInArg0: false}, + {name: "VMINPDMasked128", argLength: 3, reg: fp2kfp, asm: "VMINPD", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VMULPD128", argLength: 2, reg: fp21, asm: "VMULPD", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VSCALEFPD128", argLength: 2, reg: fp21, asm: "VSCALEFPD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VSCALEFPDMasked128", argLength: 3, reg: fp2kfp, asm: "VSCALEFPD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VMULPDMasked128", argLength: 3, reg: fp2kfp, asm: "VMULPD", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VHADDPD128", argLength: 2, reg: fp21, asm: "VHADDPD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VHSUBPD128", argLength: 2, reg: fp21, asm: "VHSUBPD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VSQRTPD128", argLength: 1, reg: fp11, asm: "VSQRTPD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VSQRTPDMasked128", argLength: 2, reg: fpkfp, asm: "VSQRTPD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VSUBPD128", argLength: 2, reg: fp21, asm: "VSUBPD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VSUBPDMasked128", argLength: 3, reg: fp2kfp, asm: "VSUBPD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VADDPD256", argLength: 2, reg: fp21, asm: "VADDPD", commutative: true, typ: "Vec256", resultInArg0: false}, + {name: "VADDPDMasked256", argLength: 3, reg: fp2kfp, asm: "VADDPD", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VADDSUBPD256", argLength: 2, reg: fp21, asm: "VADDSUBPD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VRCP14PD256", argLength: 1, reg: fp11, asm: "VRCP14PD", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VRSQRT14PD256", argLength: 1, reg: fp11, asm: "VRSQRT14PD", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VDIVPD256", argLength: 2, reg: fp21, asm: "VDIVPD", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VFMADD213PD256", argLength: 3, reg: fp31, asm: "VFMADD213PD", commutative: false, typ: "Vec256", resultInArg0: true}, - {name: "VFMADDSUB213PD256", argLength: 3, reg: fp31, asm: "VFMADDSUB213PD", commutative: false, typ: "Vec256", resultInArg0: true}, - {name: "VFMSUBADD213PD256", argLength: 3, reg: fp31, asm: "VFMSUBADD213PD", commutative: false, typ: "Vec256", resultInArg0: true}, - {name: "VADDPDMasked256", argLength: 3, reg: fp2kfp, asm: "VADDPD", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VRCP14PDMasked256", argLength: 2, reg: fpkfp, asm: "VRCP14PD", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VRSQRT14PD256", argLength: 1, reg: fp11, asm: "VRSQRT14PD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VRSQRT14PDMasked256", argLength: 2, reg: fpkfp, asm: "VRSQRT14PD", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VDIVPD256", argLength: 2, reg: fp21, asm: "VDIVPD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VDIVPDMasked256", argLength: 3, reg: fp2kfp, asm: "VDIVPD", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VFMADD213PD256", argLength: 3, reg: fp31, asm: "VFMADD213PD", commutative: false, typ: "Vec256", resultInArg0: true}, {name: "VFMADD213PDMasked256", argLength: 4, reg: fp3kfp, asm: "VFMADD213PD", commutative: false, typ: "Vec256", resultInArg0: true}, + {name: "VFMADDSUB213PD256", argLength: 3, reg: fp31, asm: "VFMADDSUB213PD", commutative: false, typ: "Vec256", resultInArg0: true}, {name: "VFMADDSUB213PDMasked256", argLength: 4, reg: fp3kfp, asm: "VFMADDSUB213PD", commutative: false, typ: "Vec256", resultInArg0: true}, + {name: "VFMSUBADD213PD256", argLength: 3, reg: fp31, asm: "VFMSUBADD213PD", commutative: false, typ: "Vec256", resultInArg0: true}, {name: "VFMSUBADD213PDMasked256", argLength: 4, reg: fp3kfp, asm: "VFMSUBADD213PD", commutative: false, typ: "Vec256", resultInArg0: true}, - {name: "VMAXPDMasked256", argLength: 3, reg: fp2kfp, asm: "VMAXPD", commutative: true, typ: "Vec256", resultInArg0: false}, - {name: "VMINPDMasked256", argLength: 3, reg: fp2kfp, asm: "VMINPD", commutative: true, typ: "Vec256", resultInArg0: false}, - {name: "VMULPDMasked256", argLength: 3, reg: fp2kfp, asm: "VMULPD", commutative: true, typ: "Vec256", resultInArg0: false}, - {name: "VSCALEFPDMasked256", argLength: 3, reg: fp2kfp, asm: "VSCALEFPD", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VSQRTPDMasked256", argLength: 2, reg: fpkfp, asm: "VSQRTPD", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VSUBPDMasked256", argLength: 3, reg: fp2kfp, asm: "VSUBPD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VMAXPD256", argLength: 2, reg: fp21, asm: "VMAXPD", commutative: true, typ: "Vec256", resultInArg0: false}, + {name: "VMAXPDMasked256", argLength: 3, reg: fp2kfp, asm: "VMAXPD", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VMINPD256", argLength: 2, reg: fp21, asm: "VMINPD", commutative: true, typ: "Vec256", resultInArg0: false}, + {name: "VMINPDMasked256", argLength: 3, reg: fp2kfp, asm: "VMINPD", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VMULPD256", argLength: 2, reg: fp21, asm: "VMULPD", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VSCALEFPD256", argLength: 2, reg: fp21, asm: "VSCALEFPD", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VSCALEFPDMasked256", argLength: 3, reg: fp2kfp, asm: "VSCALEFPD", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VMULPDMasked256", argLength: 3, reg: fp2kfp, asm: "VMULPD", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VHADDPD256", argLength: 2, reg: fp21, asm: "VHADDPD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VHSUBPD256", argLength: 2, reg: fp21, asm: "VHSUBPD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VSQRTPD256", argLength: 1, reg: fp11, asm: "VSQRTPD", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VSQRTPDMasked256", argLength: 2, reg: fpkfp, asm: "VSQRTPD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VSUBPD256", argLength: 2, reg: fp21, asm: "VSUBPD", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VSUBPDMasked256", argLength: 3, reg: fp2kfp, asm: "VSUBPD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VADDPD512", argLength: 2, reg: fp21, asm: "VADDPD", commutative: true, typ: "Vec512", resultInArg0: false}, - {name: "VRCP14PD512", argLength: 1, reg: fp11, asm: "VRCP14PD", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VRSQRT14PD512", argLength: 1, reg: fp11, asm: "VRSQRT14PD", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VDIVPD512", argLength: 2, reg: fp21, asm: "VDIVPD", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VFMADD213PD512", argLength: 3, reg: fp31, asm: "VFMADD213PD", commutative: false, typ: "Vec512", resultInArg0: true}, - {name: "VFMADDSUB213PD512", argLength: 3, reg: fp31, asm: "VFMADDSUB213PD", commutative: false, typ: "Vec512", resultInArg0: true}, - {name: "VFMSUBADD213PD512", argLength: 3, reg: fp31, asm: "VFMSUBADD213PD", commutative: false, typ: "Vec512", resultInArg0: true}, {name: "VADDPDMasked512", argLength: 3, reg: fp2kfp, asm: "VADDPD", commutative: true, typ: "Vec512", resultInArg0: false}, + {name: "VRCP14PD512", argLength: 1, reg: fp11, asm: "VRCP14PD", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VRCP14PDMasked512", argLength: 2, reg: fpkfp, asm: "VRCP14PD", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VRSQRT14PD512", argLength: 1, reg: fp11, asm: "VRSQRT14PD", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VRSQRT14PDMasked512", argLength: 2, reg: fpkfp, asm: "VRSQRT14PD", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VDIVPD512", argLength: 2, reg: fp21, asm: "VDIVPD", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VDIVPDMasked512", argLength: 3, reg: fp2kfp, asm: "VDIVPD", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VFMADD213PD512", argLength: 3, reg: fp31, asm: "VFMADD213PD", commutative: false, typ: "Vec512", resultInArg0: true}, {name: "VFMADD213PDMasked512", argLength: 4, reg: fp3kfp, asm: "VFMADD213PD", commutative: false, typ: "Vec512", resultInArg0: true}, + {name: "VFMADDSUB213PD512", argLength: 3, reg: fp31, asm: "VFMADDSUB213PD", commutative: false, typ: "Vec512", resultInArg0: true}, {name: "VFMADDSUB213PDMasked512", argLength: 4, reg: fp3kfp, asm: "VFMADDSUB213PD", commutative: false, typ: "Vec512", resultInArg0: true}, + {name: "VFMSUBADD213PD512", argLength: 3, reg: fp31, asm: "VFMSUBADD213PD", commutative: false, typ: "Vec512", resultInArg0: true}, {name: "VFMSUBADD213PDMasked512", argLength: 4, reg: fp3kfp, asm: "VFMSUBADD213PD", commutative: false, typ: "Vec512", resultInArg0: true}, - {name: "VMAXPDMasked512", argLength: 3, reg: fp2kfp, asm: "VMAXPD", commutative: true, typ: "Vec512", resultInArg0: false}, - {name: "VMINPDMasked512", argLength: 3, reg: fp2kfp, asm: "VMINPD", commutative: true, typ: "Vec512", resultInArg0: false}, - {name: "VMULPDMasked512", argLength: 3, reg: fp2kfp, asm: "VMULPD", commutative: true, typ: "Vec512", resultInArg0: false}, - {name: "VSCALEFPDMasked512", argLength: 3, reg: fp2kfp, asm: "VSCALEFPD", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VSQRTPDMasked512", argLength: 2, reg: fpkfp, asm: "VSQRTPD", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VSUBPDMasked512", argLength: 3, reg: fp2kfp, asm: "VSUBPD", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VMAXPD512", argLength: 2, reg: fp21, asm: "VMAXPD", commutative: true, typ: "Vec512", resultInArg0: false}, + {name: "VMAXPDMasked512", argLength: 3, reg: fp2kfp, asm: "VMAXPD", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VMINPD512", argLength: 2, reg: fp21, asm: "VMINPD", commutative: true, typ: "Vec512", resultInArg0: false}, + {name: "VMINPDMasked512", argLength: 3, reg: fp2kfp, asm: "VMINPD", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VMULPD512", argLength: 2, reg: fp21, asm: "VMULPD", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VSCALEFPD512", argLength: 2, reg: fp21, asm: "VSCALEFPD", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VSCALEFPDMasked512", argLength: 3, reg: fp2kfp, asm: "VSCALEFPD", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VMULPDMasked512", argLength: 3, reg: fp2kfp, asm: "VMULPD", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VSQRTPD512", argLength: 1, reg: fp11, asm: "VSQRTPD", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VSQRTPDMasked512", argLength: 2, reg: fpkfp, asm: "VSQRTPD", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VSUBPD512", argLength: 2, reg: fp21, asm: "VSUBPD", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VSUBPDMasked512", argLength: 3, reg: fp2kfp, asm: "VSUBPD", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPABSW256", argLength: 1, reg: fp11, asm: "VPABSW", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPABSWMasked256", argLength: 2, reg: fpkfp, asm: "VPABSW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPADDW256", argLength: 2, reg: fp21, asm: "VPADDW", commutative: true, typ: "Vec256", resultInArg0: false}, + {name: "VPADDWMasked256", argLength: 3, reg: fp2kfp, asm: "VPADDW", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPCMPEQW256", argLength: 2, reg: fp21, asm: "VPCMPEQW", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPCMPGTW256", argLength: 2, reg: fp21, asm: "VPCMPGTW", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPABSWMasked256", argLength: 2, reg: fpkfp, asm: "VPABSW", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPADDWMasked256", argLength: 3, reg: fp2kfp, asm: "VPADDW", commutative: true, typ: "Vec256", resultInArg0: false}, - {name: "VPMAXSWMasked256", argLength: 3, reg: fp2kfp, asm: "VPMAXSW", commutative: true, typ: "Vec256", resultInArg0: false}, - {name: "VPMINSWMasked256", argLength: 3, reg: fp2kfp, asm: "VPMINSW", commutative: true, typ: "Vec256", resultInArg0: false}, - {name: "VPMULHWMasked256", argLength: 3, reg: fp2kfp, asm: "VPMULHW", commutative: true, typ: "Vec256", resultInArg0: false}, - {name: "VPMULLWMasked256", argLength: 3, reg: fp2kfp, asm: "VPMULLW", commutative: true, typ: "Vec256", resultInArg0: false}, - {name: "VPMADDWDMasked256", argLength: 3, reg: fp2kfp, asm: "VPMADDWD", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPOPCNTWMasked256", argLength: 2, reg: fpkfp, asm: "VPOPCNTW", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPADDSWMasked256", argLength: 3, reg: fp2kfp, asm: "VPADDSW", commutative: true, typ: "Vec256", resultInArg0: false}, - {name: "VPSUBSWMasked256", argLength: 3, reg: fp2kfp, asm: "VPSUBSW", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPSLLVWMasked256", argLength: 3, reg: fp2kfp, asm: "VPSLLVW", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPSHLDVWMasked256", argLength: 4, reg: fp3kfp, asm: "VPSHLDVW", commutative: false, typ: "Vec256", resultInArg0: true}, - {name: "VPSRLVWMasked256", argLength: 3, reg: fp2kfp, asm: "VPSRLVW", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPSHRDVWMasked256", argLength: 4, reg: fp3kfp, asm: "VPSHRDVW", commutative: false, typ: "Vec256", resultInArg0: true}, - {name: "VPSRAVWMasked256", argLength: 3, reg: fp2kfp, asm: "VPSRAVW", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPSUBWMasked256", argLength: 3, reg: fp2kfp, asm: "VPSUBW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPMAXSW256", argLength: 2, reg: fp21, asm: "VPMAXSW", commutative: true, typ: "Vec256", resultInArg0: false}, + {name: "VPMAXSWMasked256", argLength: 3, reg: fp2kfp, asm: "VPMAXSW", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMINSW256", argLength: 2, reg: fp21, asm: "VPMINSW", commutative: true, typ: "Vec256", resultInArg0: false}, + {name: "VPMINSWMasked256", argLength: 3, reg: fp2kfp, asm: "VPMINSW", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMULHW256", argLength: 2, reg: fp21, asm: "VPMULHW", commutative: true, typ: "Vec256", resultInArg0: false}, + {name: "VPMULHWMasked256", argLength: 3, reg: fp2kfp, asm: "VPMULHW", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMULLW256", argLength: 2, reg: fp21, asm: "VPMULLW", commutative: true, typ: "Vec256", resultInArg0: false}, + {name: "VPMULLWMasked256", argLength: 3, reg: fp2kfp, asm: "VPMULLW", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMADDWD256", argLength: 2, reg: fp21, asm: "VPMADDWD", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPMADDWDMasked256", argLength: 3, reg: fp2kfp, asm: "VPMADDWD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPHADDW256", argLength: 2, reg: fp21, asm: "VPHADDW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPHSUBW256", argLength: 2, reg: fp21, asm: "VPHSUBW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPOPCNTW256", argLength: 1, reg: fp11, asm: "VPOPCNTW", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPOPCNTWMasked256", argLength: 2, reg: fpkfp, asm: "VPOPCNTW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPADDSW256", argLength: 2, reg: fp21, asm: "VPADDSW", commutative: true, typ: "Vec256", resultInArg0: false}, + {name: "VPADDSWMasked256", argLength: 3, reg: fp2kfp, asm: "VPADDSW", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPHADDSW256", argLength: 2, reg: fp21, asm: "VPHADDSW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPHSUBSW256", argLength: 2, reg: fp21, asm: "VPHSUBSW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSUBSW256", argLength: 2, reg: fp21, asm: "VPSUBSW", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSUBSWMasked256", argLength: 3, reg: fp2kfp, asm: "VPSUBSW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSLLW256", argLength: 2, reg: fp21, asm: "VPSLLW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSRLW256", argLength: 2, reg: fp21, asm: "VPSRLW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSRAW256", argLength: 2, reg: fp21, asm: "VPSRAW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSLLVW256", argLength: 2, reg: fp21, asm: "VPSLLVW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSHLDVW256", argLength: 3, reg: fp31, asm: "VPSHLDVW", commutative: false, typ: "Vec256", resultInArg0: true}, + {name: "VPSHLDVWMasked256", argLength: 4, reg: fp3kfp, asm: "VPSHLDVW", commutative: false, typ: "Vec256", resultInArg0: true}, + {name: "VPSLLVWMasked256", argLength: 3, reg: fp2kfp, asm: "VPSLLVW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSRLVW256", argLength: 2, reg: fp21, asm: "VPSRLVW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSHRDVW256", argLength: 3, reg: fp31, asm: "VPSHRDVW", commutative: false, typ: "Vec256", resultInArg0: true}, + {name: "VPSHRDVWMasked256", argLength: 4, reg: fp3kfp, asm: "VPSHRDVW", commutative: false, typ: "Vec256", resultInArg0: true}, + {name: "VPSRLVWMasked256", argLength: 3, reg: fp2kfp, asm: "VPSRLVW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSRAVW256", argLength: 2, reg: fp21, asm: "VPSRAVW", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSRAVWMasked256", argLength: 3, reg: fp2kfp, asm: "VPSRAVW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSIGNW256", argLength: 2, reg: fp21, asm: "VPSIGNW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSUBW256", argLength: 2, reg: fp21, asm: "VPSUBW", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSUBWMasked256", argLength: 3, reg: fp2kfp, asm: "VPSUBW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPABSW512", argLength: 1, reg: fp11, asm: "VPABSW", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPADDW512", argLength: 2, reg: fp21, asm: "VPADDW", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPABSWMasked512", argLength: 2, reg: fpkfp, asm: "VPABSW", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPADDW512", argLength: 2, reg: fp21, asm: "VPADDW", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPADDWMasked512", argLength: 3, reg: fp2kfp, asm: "VPADDW", commutative: true, typ: "Vec512", resultInArg0: false}, - {name: "VPMAXSWMasked512", argLength: 3, reg: fp2kfp, asm: "VPMAXSW", commutative: true, typ: "Vec512", resultInArg0: false}, - {name: "VPMINSWMasked512", argLength: 3, reg: fp2kfp, asm: "VPMINSW", commutative: true, typ: "Vec512", resultInArg0: false}, - {name: "VPMULHWMasked512", argLength: 3, reg: fp2kfp, asm: "VPMULHW", commutative: true, typ: "Vec512", resultInArg0: false}, - {name: "VPMULLWMasked512", argLength: 3, reg: fp2kfp, asm: "VPMULLW", commutative: true, typ: "Vec512", resultInArg0: false}, - {name: "VPMADDWDMasked512", argLength: 3, reg: fp2kfp, asm: "VPMADDWD", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPOPCNTWMasked512", argLength: 2, reg: fpkfp, asm: "VPOPCNTW", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPADDSWMasked512", argLength: 3, reg: fp2kfp, asm: "VPADDSW", commutative: true, typ: "Vec512", resultInArg0: false}, - {name: "VPSUBSWMasked512", argLength: 3, reg: fp2kfp, asm: "VPSUBSW", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPSLLVWMasked512", argLength: 3, reg: fp2kfp, asm: "VPSLLVW", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPSHLDVWMasked512", argLength: 4, reg: fp3kfp, asm: "VPSHLDVW", commutative: false, typ: "Vec512", resultInArg0: true}, - {name: "VPSRLVWMasked512", argLength: 3, reg: fp2kfp, asm: "VPSRLVW", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPSHRDVWMasked512", argLength: 4, reg: fp3kfp, asm: "VPSHRDVW", commutative: false, typ: "Vec512", resultInArg0: true}, - {name: "VPSRAVWMasked512", argLength: 3, reg: fp2kfp, asm: "VPSRAVW", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPSUBWMasked512", argLength: 3, reg: fp2kfp, asm: "VPSUBW", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPMAXSW512", argLength: 2, reg: fp21, asm: "VPMAXSW", commutative: true, typ: "Vec512", resultInArg0: false}, + {name: "VPMAXSWMasked512", argLength: 3, reg: fp2kfp, asm: "VPMAXSW", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMINSW512", argLength: 2, reg: fp21, asm: "VPMINSW", commutative: true, typ: "Vec512", resultInArg0: false}, + {name: "VPMINSWMasked512", argLength: 3, reg: fp2kfp, asm: "VPMINSW", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMULHW512", argLength: 2, reg: fp21, asm: "VPMULHW", commutative: true, typ: "Vec512", resultInArg0: false}, + {name: "VPMULHWMasked512", argLength: 3, reg: fp2kfp, asm: "VPMULHW", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMULLW512", argLength: 2, reg: fp21, asm: "VPMULLW", commutative: true, typ: "Vec512", resultInArg0: false}, + {name: "VPMULLWMasked512", argLength: 3, reg: fp2kfp, asm: "VPMULLW", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMADDWD512", argLength: 2, reg: fp21, asm: "VPMADDWD", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPMADDWDMasked512", argLength: 3, reg: fp2kfp, asm: "VPMADDWD", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPOPCNTW512", argLength: 1, reg: fp11, asm: "VPOPCNTW", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPOPCNTWMasked512", argLength: 2, reg: fpkfp, asm: "VPOPCNTW", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPADDSW512", argLength: 2, reg: fp21, asm: "VPADDSW", commutative: true, typ: "Vec512", resultInArg0: false}, + {name: "VPADDSWMasked512", argLength: 3, reg: fp2kfp, asm: "VPADDSW", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPSUBSW512", argLength: 2, reg: fp21, asm: "VPSUBSW", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSUBSWMasked512", argLength: 3, reg: fp2kfp, asm: "VPSUBSW", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSLLVW512", argLength: 2, reg: fp21, asm: "VPSLLVW", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSHLDVW512", argLength: 3, reg: fp31, asm: "VPSHLDVW", commutative: false, typ: "Vec512", resultInArg0: true}, + {name: "VPSHLDVWMasked512", argLength: 4, reg: fp3kfp, asm: "VPSHLDVW", commutative: false, typ: "Vec512", resultInArg0: true}, + {name: "VPSLLVWMasked512", argLength: 3, reg: fp2kfp, asm: "VPSLLVW", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSRLVW512", argLength: 2, reg: fp21, asm: "VPSRLVW", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSHRDVW512", argLength: 3, reg: fp31, asm: "VPSHRDVW", commutative: false, typ: "Vec512", resultInArg0: true}, + {name: "VPSHRDVWMasked512", argLength: 4, reg: fp3kfp, asm: "VPSHRDVW", commutative: false, typ: "Vec512", resultInArg0: true}, + {name: "VPSRLVWMasked512", argLength: 3, reg: fp2kfp, asm: "VPSRLVW", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSRAVW512", argLength: 2, reg: fp21, asm: "VPSRAVW", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSRAVWMasked512", argLength: 3, reg: fp2kfp, asm: "VPSRAVW", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSUBW512", argLength: 2, reg: fp21, asm: "VPSUBW", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSUBWMasked512", argLength: 3, reg: fp2kfp, asm: "VPSUBW", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPABSW128", argLength: 1, reg: fp11, asm: "VPABSW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPABSWMasked128", argLength: 2, reg: fpkfp, asm: "VPABSW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPADDW128", argLength: 2, reg: fp21, asm: "VPADDW", commutative: true, typ: "Vec128", resultInArg0: false}, + {name: "VPADDWMasked128", argLength: 3, reg: fp2kfp, asm: "VPADDW", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPCMPEQW128", argLength: 2, reg: fp21, asm: "VPCMPEQW", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPCMPGTW128", argLength: 2, reg: fp21, asm: "VPCMPGTW", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPABSWMasked128", argLength: 2, reg: fpkfp, asm: "VPABSW", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPADDWMasked128", argLength: 3, reg: fp2kfp, asm: "VPADDW", commutative: true, typ: "Vec128", resultInArg0: false}, - {name: "VPMAXSWMasked128", argLength: 3, reg: fp2kfp, asm: "VPMAXSW", commutative: true, typ: "Vec128", resultInArg0: false}, - {name: "VPMINSWMasked128", argLength: 3, reg: fp2kfp, asm: "VPMINSW", commutative: true, typ: "Vec128", resultInArg0: false}, - {name: "VPMULHWMasked128", argLength: 3, reg: fp2kfp, asm: "VPMULHW", commutative: true, typ: "Vec128", resultInArg0: false}, - {name: "VPMULLWMasked128", argLength: 3, reg: fp2kfp, asm: "VPMULLW", commutative: true, typ: "Vec128", resultInArg0: false}, - {name: "VPMADDWDMasked128", argLength: 3, reg: fp2kfp, asm: "VPMADDWD", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPOPCNTWMasked128", argLength: 2, reg: fpkfp, asm: "VPOPCNTW", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPADDSWMasked128", argLength: 3, reg: fp2kfp, asm: "VPADDSW", commutative: true, typ: "Vec128", resultInArg0: false}, - {name: "VPSUBSWMasked128", argLength: 3, reg: fp2kfp, asm: "VPSUBSW", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPSLLVWMasked128", argLength: 3, reg: fp2kfp, asm: "VPSLLVW", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPSHLDVWMasked128", argLength: 4, reg: fp3kfp, asm: "VPSHLDVW", commutative: false, typ: "Vec128", resultInArg0: true}, - {name: "VPSRLVWMasked128", argLength: 3, reg: fp2kfp, asm: "VPSRLVW", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPSHRDVWMasked128", argLength: 4, reg: fp3kfp, asm: "VPSHRDVW", commutative: false, typ: "Vec128", resultInArg0: true}, - {name: "VPSRAVWMasked128", argLength: 3, reg: fp2kfp, asm: "VPSRAVW", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPSUBWMasked128", argLength: 3, reg: fp2kfp, asm: "VPSUBW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMAXSW128", argLength: 2, reg: fp21, asm: "VPMAXSW", commutative: true, typ: "Vec128", resultInArg0: false}, + {name: "VPMAXSWMasked128", argLength: 3, reg: fp2kfp, asm: "VPMAXSW", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMINSW128", argLength: 2, reg: fp21, asm: "VPMINSW", commutative: true, typ: "Vec128", resultInArg0: false}, + {name: "VPMINSWMasked128", argLength: 3, reg: fp2kfp, asm: "VPMINSW", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMULHW128", argLength: 2, reg: fp21, asm: "VPMULHW", commutative: true, typ: "Vec128", resultInArg0: false}, + {name: "VPMULHWMasked128", argLength: 3, reg: fp2kfp, asm: "VPMULHW", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMULLW128", argLength: 2, reg: fp21, asm: "VPMULLW", commutative: true, typ: "Vec128", resultInArg0: false}, + {name: "VPMULLWMasked128", argLength: 3, reg: fp2kfp, asm: "VPMULLW", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMADDWD128", argLength: 2, reg: fp21, asm: "VPMADDWD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMADDWDMasked128", argLength: 3, reg: fp2kfp, asm: "VPMADDWD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPHADDW128", argLength: 2, reg: fp21, asm: "VPHADDW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPHSUBW128", argLength: 2, reg: fp21, asm: "VPHSUBW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPOPCNTW128", argLength: 1, reg: fp11, asm: "VPOPCNTW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPOPCNTWMasked128", argLength: 2, reg: fpkfp, asm: "VPOPCNTW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPADDSW128", argLength: 2, reg: fp21, asm: "VPADDSW", commutative: true, typ: "Vec128", resultInArg0: false}, + {name: "VPADDSWMasked128", argLength: 3, reg: fp2kfp, asm: "VPADDSW", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPHADDSW128", argLength: 2, reg: fp21, asm: "VPHADDSW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPHSUBSW128", argLength: 2, reg: fp21, asm: "VPHSUBSW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSUBSW128", argLength: 2, reg: fp21, asm: "VPSUBSW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSUBSWMasked128", argLength: 3, reg: fp2kfp, asm: "VPSUBSW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSLLW128", argLength: 2, reg: fp21, asm: "VPSLLW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSRLW128", argLength: 2, reg: fp21, asm: "VPSRLW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSRAW128", argLength: 2, reg: fp21, asm: "VPSRAW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSLLVW128", argLength: 2, reg: fp21, asm: "VPSLLVW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSHLDVW128", argLength: 3, reg: fp31, asm: "VPSHLDVW", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPSHLDVWMasked128", argLength: 4, reg: fp3kfp, asm: "VPSHLDVW", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPSLLVWMasked128", argLength: 3, reg: fp2kfp, asm: "VPSLLVW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSRLVW128", argLength: 2, reg: fp21, asm: "VPSRLVW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSHRDVW128", argLength: 3, reg: fp31, asm: "VPSHRDVW", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPSHRDVWMasked128", argLength: 4, reg: fp3kfp, asm: "VPSHRDVW", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPSRLVWMasked128", argLength: 3, reg: fp2kfp, asm: "VPSRLVW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSRAVW128", argLength: 2, reg: fp21, asm: "VPSRAVW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSRAVWMasked128", argLength: 3, reg: fp2kfp, asm: "VPSRAVW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSIGNW128", argLength: 2, reg: fp21, asm: "VPSIGNW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSUBW128", argLength: 2, reg: fp21, asm: "VPSUBW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSUBWMasked128", argLength: 3, reg: fp2kfp, asm: "VPSUBW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPABSD512", argLength: 1, reg: fp11, asm: "VPABSD", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPADDD512", argLength: 2, reg: fp21, asm: "VPADDD", commutative: true, typ: "Vec512", resultInArg0: false}, - {name: "VPANDD512", argLength: 2, reg: fp21, asm: "VPANDD", commutative: true, typ: "Vec512", resultInArg0: false}, - {name: "VPANDND512", argLength: 2, reg: fp21, asm: "VPANDND", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPABSDMasked512", argLength: 2, reg: fpkfp, asm: "VPABSD", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPADDD512", argLength: 2, reg: fp21, asm: "VPADDD", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPADDDMasked512", argLength: 3, reg: fp2kfp, asm: "VPADDD", commutative: true, typ: "Vec512", resultInArg0: false}, + {name: "VPANDD512", argLength: 2, reg: fp21, asm: "VPANDD", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPANDDMasked512", argLength: 3, reg: fp2kfp, asm: "VPANDD", commutative: true, typ: "Vec512", resultInArg0: false}, + {name: "VPANDND512", argLength: 2, reg: fp21, asm: "VPANDND", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPANDNDMasked512", argLength: 3, reg: fp2kfp, asm: "VPANDND", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPMAXSDMasked512", argLength: 3, reg: fp2kfp, asm: "VPMAXSD", commutative: true, typ: "Vec512", resultInArg0: false}, - {name: "VPMINSDMasked512", argLength: 3, reg: fp2kfp, asm: "VPMINSD", commutative: true, typ: "Vec512", resultInArg0: false}, - {name: "VPMULLDMasked512", argLength: 3, reg: fp2kfp, asm: "VPMULLD", commutative: true, typ: "Vec512", resultInArg0: false}, - {name: "VPORDMasked512", argLength: 3, reg: fp2kfp, asm: "VPORD", commutative: true, typ: "Vec512", resultInArg0: false}, - {name: "VPDPWSSDMasked512", argLength: 4, reg: fp3kfp, asm: "VPDPWSSD", commutative: false, typ: "Vec512", resultInArg0: true}, - {name: "VPOPCNTDMasked512", argLength: 2, reg: fpkfp, asm: "VPOPCNTD", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPROLVDMasked512", argLength: 3, reg: fp2kfp, asm: "VPROLVD", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPRORVDMasked512", argLength: 3, reg: fp2kfp, asm: "VPRORVD", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPDPWSSDSMasked512", argLength: 4, reg: fp3kfp, asm: "VPDPWSSDS", commutative: false, typ: "Vec512", resultInArg0: true}, - {name: "VPDPBUSDSMasked512", argLength: 4, reg: fp3kfp, asm: "VPDPBUSDS", commutative: false, typ: "Vec512", resultInArg0: true}, - {name: "VPSLLVDMasked512", argLength: 3, reg: fp2kfp, asm: "VPSLLVD", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPSHLDVDMasked512", argLength: 4, reg: fp3kfp, asm: "VPSHLDVD", commutative: false, typ: "Vec512", resultInArg0: true}, - {name: "VPSRLVDMasked512", argLength: 3, reg: fp2kfp, asm: "VPSRLVD", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPSHRDVDMasked512", argLength: 4, reg: fp3kfp, asm: "VPSHRDVD", commutative: false, typ: "Vec512", resultInArg0: true}, - {name: "VPSRAVDMasked512", argLength: 3, reg: fp2kfp, asm: "VPSRAVD", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPSUBDMasked512", argLength: 3, reg: fp2kfp, asm: "VPSUBD", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPDPBUSDMasked512", argLength: 4, reg: fp3kfp, asm: "VPDPBUSD", commutative: false, typ: "Vec512", resultInArg0: true}, - {name: "VPXORDMasked512", argLength: 3, reg: fp2kfp, asm: "VPXORD", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMAXSD512", argLength: 2, reg: fp21, asm: "VPMAXSD", commutative: true, typ: "Vec512", resultInArg0: false}, + {name: "VPMAXSDMasked512", argLength: 3, reg: fp2kfp, asm: "VPMAXSD", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMINSD512", argLength: 2, reg: fp21, asm: "VPMINSD", commutative: true, typ: "Vec512", resultInArg0: false}, + {name: "VPMINSDMasked512", argLength: 3, reg: fp2kfp, asm: "VPMINSD", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMULLD512", argLength: 2, reg: fp21, asm: "VPMULLD", commutative: true, typ: "Vec512", resultInArg0: false}, + {name: "VPMULLDMasked512", argLength: 3, reg: fp2kfp, asm: "VPMULLD", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPORD512", argLength: 2, reg: fp21, asm: "VPORD", commutative: true, typ: "Vec512", resultInArg0: false}, + {name: "VPORDMasked512", argLength: 3, reg: fp2kfp, asm: "VPORD", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPDPWSSD512", argLength: 3, reg: fp31, asm: "VPDPWSSD", commutative: false, typ: "Vec512", resultInArg0: true}, + {name: "VPDPWSSDMasked512", argLength: 4, reg: fp3kfp, asm: "VPDPWSSD", commutative: false, typ: "Vec512", resultInArg0: true}, {name: "VPOPCNTD512", argLength: 1, reg: fp11, asm: "VPOPCNTD", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPOPCNTDMasked512", argLength: 2, reg: fpkfp, asm: "VPOPCNTD", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPROLVD512", argLength: 2, reg: fp21, asm: "VPROLVD", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPROLVDMasked512", argLength: 3, reg: fp2kfp, asm: "VPROLVD", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPRORVD512", argLength: 2, reg: fp21, asm: "VPRORVD", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPRORVDMasked512", argLength: 3, reg: fp2kfp, asm: "VPRORVD", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPDPWSSDS512", argLength: 3, reg: fp31, asm: "VPDPWSSDS", commutative: false, typ: "Vec512", resultInArg0: true}, + {name: "VPDPWSSDSMasked512", argLength: 4, reg: fp3kfp, asm: "VPDPWSSDS", commutative: false, typ: "Vec512", resultInArg0: true}, {name: "VPDPBUSDS512", argLength: 3, reg: fp31, asm: "VPDPBUSDS", commutative: false, typ: "Vec512", resultInArg0: true}, + {name: "VPDPBUSDSMasked512", argLength: 4, reg: fp3kfp, asm: "VPDPBUSDS", commutative: false, typ: "Vec512", resultInArg0: true}, {name: "VPSLLVD512", argLength: 2, reg: fp21, asm: "VPSLLVD", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSHLDVD512", argLength: 3, reg: fp31, asm: "VPSHLDVD", commutative: false, typ: "Vec512", resultInArg0: true}, + {name: "VPSHLDVDMasked512", argLength: 4, reg: fp3kfp, asm: "VPSHLDVD", commutative: false, typ: "Vec512", resultInArg0: true}, + {name: "VPSLLVDMasked512", argLength: 3, reg: fp2kfp, asm: "VPSLLVD", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSRLVD512", argLength: 2, reg: fp21, asm: "VPSRLVD", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSHRDVD512", argLength: 3, reg: fp31, asm: "VPSHRDVD", commutative: false, typ: "Vec512", resultInArg0: true}, + {name: "VPSHRDVDMasked512", argLength: 4, reg: fp3kfp, asm: "VPSHRDVD", commutative: false, typ: "Vec512", resultInArg0: true}, + {name: "VPSRLVDMasked512", argLength: 3, reg: fp2kfp, asm: "VPSRLVD", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSRAVD512", argLength: 2, reg: fp21, asm: "VPSRAVD", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSRAVDMasked512", argLength: 3, reg: fp2kfp, asm: "VPSRAVD", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSUBD512", argLength: 2, reg: fp21, asm: "VPSUBD", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSUBDMasked512", argLength: 3, reg: fp2kfp, asm: "VPSUBD", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPDPBUSD512", argLength: 3, reg: fp31, asm: "VPDPBUSD", commutative: false, typ: "Vec512", resultInArg0: true}, + {name: "VPDPBUSDMasked512", argLength: 4, reg: fp3kfp, asm: "VPDPBUSD", commutative: false, typ: "Vec512", resultInArg0: true}, {name: "VPXORD512", argLength: 2, reg: fp21, asm: "VPXORD", commutative: true, typ: "Vec512", resultInArg0: false}, + {name: "VPXORDMasked512", argLength: 3, reg: fp2kfp, asm: "VPXORD", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPABSD128", argLength: 1, reg: fp11, asm: "VPABSD", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPADDD128", argLength: 2, reg: fp21, asm: "VPADDD", commutative: true, typ: "Vec128", resultInArg0: false}, - {name: "VPCMPEQD128", argLength: 2, reg: fp21, asm: "VPCMPEQD", commutative: true, typ: "Vec128", resultInArg0: false}, - {name: "VPCMPGTD128", argLength: 2, reg: fp21, asm: "VPCMPGTD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPABSDMasked128", argLength: 2, reg: fpkfp, asm: "VPABSD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPADDD128", argLength: 2, reg: fp21, asm: "VPADDD", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPADDDMasked128", argLength: 3, reg: fp2kfp, asm: "VPADDD", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPANDDMasked128", argLength: 3, reg: fp2kfp, asm: "VPANDD", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPANDNDMasked128", argLength: 3, reg: fp2kfp, asm: "VPANDND", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPMAXSDMasked128", argLength: 3, reg: fp2kfp, asm: "VPMAXSD", commutative: true, typ: "Vec128", resultInArg0: false}, - {name: "VPMINSDMasked128", argLength: 3, reg: fp2kfp, asm: "VPMINSD", commutative: true, typ: "Vec128", resultInArg0: false}, - {name: "VPMULLDMasked128", argLength: 3, reg: fp2kfp, asm: "VPMULLD", commutative: true, typ: "Vec128", resultInArg0: false}, - {name: "VPORDMasked128", argLength: 3, reg: fp2kfp, asm: "VPORD", commutative: true, typ: "Vec128", resultInArg0: false}, - {name: "VPDPWSSDMasked128", argLength: 4, reg: fp3kfp, asm: "VPDPWSSD", commutative: false, typ: "Vec128", resultInArg0: true}, - {name: "VPOPCNTDMasked128", argLength: 2, reg: fpkfp, asm: "VPOPCNTD", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPROLVDMasked128", argLength: 3, reg: fp2kfp, asm: "VPROLVD", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPRORVDMasked128", argLength: 3, reg: fp2kfp, asm: "VPRORVD", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPDPWSSDSMasked128", argLength: 4, reg: fp3kfp, asm: "VPDPWSSDS", commutative: false, typ: "Vec128", resultInArg0: true}, - {name: "VPDPBUSDSMasked128", argLength: 4, reg: fp3kfp, asm: "VPDPBUSDS", commutative: false, typ: "Vec128", resultInArg0: true}, - {name: "VPSLLVDMasked128", argLength: 3, reg: fp2kfp, asm: "VPSLLVD", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPSHLDVDMasked128", argLength: 4, reg: fp3kfp, asm: "VPSHLDVD", commutative: false, typ: "Vec128", resultInArg0: true}, - {name: "VPSRLVDMasked128", argLength: 3, reg: fp2kfp, asm: "VPSRLVD", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPSHRDVDMasked128", argLength: 4, reg: fp3kfp, asm: "VPSHRDVD", commutative: false, typ: "Vec128", resultInArg0: true}, - {name: "VPSRAVDMasked128", argLength: 3, reg: fp2kfp, asm: "VPSRAVD", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPSUBDMasked128", argLength: 3, reg: fp2kfp, asm: "VPSUBD", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPDPBUSDMasked128", argLength: 4, reg: fp3kfp, asm: "VPDPBUSD", commutative: false, typ: "Vec128", resultInArg0: true}, - {name: "VPXORDMasked128", argLength: 3, reg: fp2kfp, asm: "VPXORD", commutative: true, typ: "Vec128", resultInArg0: false}, + {name: "VPCMPEQD128", argLength: 2, reg: fp21, asm: "VPCMPEQD", commutative: true, typ: "Vec128", resultInArg0: false}, + {name: "VPCMPGTD128", argLength: 2, reg: fp21, asm: "VPCMPGTD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMAXSD128", argLength: 2, reg: fp21, asm: "VPMAXSD", commutative: true, typ: "Vec128", resultInArg0: false}, + {name: "VPMAXSDMasked128", argLength: 3, reg: fp2kfp, asm: "VPMAXSD", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMINSD128", argLength: 2, reg: fp21, asm: "VPMINSD", commutative: true, typ: "Vec128", resultInArg0: false}, + {name: "VPMINSDMasked128", argLength: 3, reg: fp2kfp, asm: "VPMINSD", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMULDQ128", argLength: 2, reg: fp21, asm: "VPMULDQ", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMULLD128", argLength: 2, reg: fp21, asm: "VPMULLD", commutative: true, typ: "Vec128", resultInArg0: false}, + {name: "VPMULLDMasked128", argLength: 3, reg: fp2kfp, asm: "VPMULLD", commutative: true, typ: "Vec128", resultInArg0: false}, + {name: "VPORDMasked128", argLength: 3, reg: fp2kfp, asm: "VPORD", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPDPWSSD128", argLength: 3, reg: fp31, asm: "VPDPWSSD", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPDPWSSDMasked128", argLength: 4, reg: fp3kfp, asm: "VPDPWSSD", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPHADDD128", argLength: 2, reg: fp21, asm: "VPHADDD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPHSUBD128", argLength: 2, reg: fp21, asm: "VPHSUBD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPOPCNTD128", argLength: 1, reg: fp11, asm: "VPOPCNTD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPOPCNTDMasked128", argLength: 2, reg: fpkfp, asm: "VPOPCNTD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPROLVD128", argLength: 2, reg: fp21, asm: "VPROLVD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPROLVDMasked128", argLength: 3, reg: fp2kfp, asm: "VPROLVD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPRORVD128", argLength: 2, reg: fp21, asm: "VPRORVD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPRORVDMasked128", argLength: 3, reg: fp2kfp, asm: "VPRORVD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPDPWSSDS128", argLength: 3, reg: fp31, asm: "VPDPWSSDS", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPDPWSSDSMasked128", argLength: 4, reg: fp3kfp, asm: "VPDPWSSDS", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPDPBUSDS128", argLength: 3, reg: fp31, asm: "VPDPBUSDS", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPDPBUSDSMasked128", argLength: 4, reg: fp3kfp, asm: "VPDPBUSDS", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPSLLD128", argLength: 2, reg: fp21, asm: "VPSLLD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSRLD128", argLength: 2, reg: fp21, asm: "VPSRLD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSRAD128", argLength: 2, reg: fp21, asm: "VPSRAD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSLLVD128", argLength: 2, reg: fp21, asm: "VPSLLVD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSHLDVD128", argLength: 3, reg: fp31, asm: "VPSHLDVD", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPSHLDVDMasked128", argLength: 4, reg: fp3kfp, asm: "VPSHLDVD", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPSLLVDMasked128", argLength: 3, reg: fp2kfp, asm: "VPSLLVD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSRLVD128", argLength: 2, reg: fp21, asm: "VPSRLVD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSHRDVD128", argLength: 3, reg: fp31, asm: "VPSHRDVD", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPSHRDVDMasked128", argLength: 4, reg: fp3kfp, asm: "VPSHRDVD", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPSRLVDMasked128", argLength: 3, reg: fp2kfp, asm: "VPSRLVD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSRAVD128", argLength: 2, reg: fp21, asm: "VPSRAVD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSRAVDMasked128", argLength: 3, reg: fp2kfp, asm: "VPSRAVD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSIGND128", argLength: 2, reg: fp21, asm: "VPSIGND", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSUBD128", argLength: 2, reg: fp21, asm: "VPSUBD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSUBDMasked128", argLength: 3, reg: fp2kfp, asm: "VPSUBD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPDPBUSD128", argLength: 3, reg: fp31, asm: "VPDPBUSD", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPDPBUSDMasked128", argLength: 4, reg: fp3kfp, asm: "VPDPBUSD", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPXORDMasked128", argLength: 3, reg: fp2kfp, asm: "VPXORD", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPABSD256", argLength: 1, reg: fp11, asm: "VPABSD", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPADDD256", argLength: 2, reg: fp21, asm: "VPADDD", commutative: true, typ: "Vec256", resultInArg0: false}, - {name: "VPCMPEQD256", argLength: 2, reg: fp21, asm: "VPCMPEQD", commutative: true, typ: "Vec256", resultInArg0: false}, - {name: "VPCMPGTD256", argLength: 2, reg: fp21, asm: "VPCMPGTD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPABSDMasked256", argLength: 2, reg: fpkfp, asm: "VPABSD", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPADDD256", argLength: 2, reg: fp21, asm: "VPADDD", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPADDDMasked256", argLength: 3, reg: fp2kfp, asm: "VPADDD", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPANDDMasked256", argLength: 3, reg: fp2kfp, asm: "VPANDD", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPANDNDMasked256", argLength: 3, reg: fp2kfp, asm: "VPANDND", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPMAXSDMasked256", argLength: 3, reg: fp2kfp, asm: "VPMAXSD", commutative: true, typ: "Vec256", resultInArg0: false}, - {name: "VPMINSDMasked256", argLength: 3, reg: fp2kfp, asm: "VPMINSD", commutative: true, typ: "Vec256", resultInArg0: false}, - {name: "VPMULLDMasked256", argLength: 3, reg: fp2kfp, asm: "VPMULLD", commutative: true, typ: "Vec256", resultInArg0: false}, - {name: "VPORDMasked256", argLength: 3, reg: fp2kfp, asm: "VPORD", commutative: true, typ: "Vec256", resultInArg0: false}, - {name: "VPDPWSSDMasked256", argLength: 4, reg: fp3kfp, asm: "VPDPWSSD", commutative: false, typ: "Vec256", resultInArg0: true}, - {name: "VPOPCNTDMasked256", argLength: 2, reg: fpkfp, asm: "VPOPCNTD", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPROLVDMasked256", argLength: 3, reg: fp2kfp, asm: "VPROLVD", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPRORVDMasked256", argLength: 3, reg: fp2kfp, asm: "VPRORVD", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPDPWSSDSMasked256", argLength: 4, reg: fp3kfp, asm: "VPDPWSSDS", commutative: false, typ: "Vec256", resultInArg0: true}, - {name: "VPDPBUSDSMasked256", argLength: 4, reg: fp3kfp, asm: "VPDPBUSDS", commutative: false, typ: "Vec256", resultInArg0: true}, - {name: "VPSLLVDMasked256", argLength: 3, reg: fp2kfp, asm: "VPSLLVD", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPSHLDVDMasked256", argLength: 4, reg: fp3kfp, asm: "VPSHLDVD", commutative: false, typ: "Vec256", resultInArg0: true}, - {name: "VPSRLVDMasked256", argLength: 3, reg: fp2kfp, asm: "VPSRLVD", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPSHRDVDMasked256", argLength: 4, reg: fp3kfp, asm: "VPSHRDVD", commutative: false, typ: "Vec256", resultInArg0: true}, - {name: "VPSRAVDMasked256", argLength: 3, reg: fp2kfp, asm: "VPSRAVD", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPSUBDMasked256", argLength: 3, reg: fp2kfp, asm: "VPSUBD", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPDPBUSDMasked256", argLength: 4, reg: fp3kfp, asm: "VPDPBUSD", commutative: false, typ: "Vec256", resultInArg0: true}, - {name: "VPXORDMasked256", argLength: 3, reg: fp2kfp, asm: "VPXORD", commutative: true, typ: "Vec256", resultInArg0: false}, + {name: "VPCMPEQD256", argLength: 2, reg: fp21, asm: "VPCMPEQD", commutative: true, typ: "Vec256", resultInArg0: false}, + {name: "VPCMPGTD256", argLength: 2, reg: fp21, asm: "VPCMPGTD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPMAXSD256", argLength: 2, reg: fp21, asm: "VPMAXSD", commutative: true, typ: "Vec256", resultInArg0: false}, + {name: "VPMAXSDMasked256", argLength: 3, reg: fp2kfp, asm: "VPMAXSD", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMINSD256", argLength: 2, reg: fp21, asm: "VPMINSD", commutative: true, typ: "Vec256", resultInArg0: false}, + {name: "VPMINSDMasked256", argLength: 3, reg: fp2kfp, asm: "VPMINSD", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMULDQ256", argLength: 2, reg: fp21, asm: "VPMULDQ", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMULLD256", argLength: 2, reg: fp21, asm: "VPMULLD", commutative: true, typ: "Vec256", resultInArg0: false}, + {name: "VPMULLDMasked256", argLength: 3, reg: fp2kfp, asm: "VPMULLD", commutative: true, typ: "Vec256", resultInArg0: false}, + {name: "VPORDMasked256", argLength: 3, reg: fp2kfp, asm: "VPORD", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPDPWSSD256", argLength: 3, reg: fp31, asm: "VPDPWSSD", commutative: false, typ: "Vec256", resultInArg0: true}, + {name: "VPDPWSSDMasked256", argLength: 4, reg: fp3kfp, asm: "VPDPWSSD", commutative: false, typ: "Vec256", resultInArg0: true}, {name: "VPHADDD256", argLength: 2, reg: fp21, asm: "VPHADDD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPHSUBD256", argLength: 2, reg: fp21, asm: "VPHSUBD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPOPCNTD256", argLength: 1, reg: fp11, asm: "VPOPCNTD", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPOPCNTDMasked256", argLength: 2, reg: fpkfp, asm: "VPOPCNTD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPROLVD256", argLength: 2, reg: fp21, asm: "VPROLVD", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPROLVDMasked256", argLength: 3, reg: fp2kfp, asm: "VPROLVD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPRORVD256", argLength: 2, reg: fp21, asm: "VPRORVD", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPRORVDMasked256", argLength: 3, reg: fp2kfp, asm: "VPRORVD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPDPWSSDS256", argLength: 3, reg: fp31, asm: "VPDPWSSDS", commutative: false, typ: "Vec256", resultInArg0: true}, + {name: "VPDPWSSDSMasked256", argLength: 4, reg: fp3kfp, asm: "VPDPWSSDS", commutative: false, typ: "Vec256", resultInArg0: true}, {name: "VPDPBUSDS256", argLength: 3, reg: fp31, asm: "VPDPBUSDS", commutative: false, typ: "Vec256", resultInArg0: true}, + {name: "VPDPBUSDSMasked256", argLength: 4, reg: fp3kfp, asm: "VPDPBUSDS", commutative: false, typ: "Vec256", resultInArg0: true}, {name: "VPSLLD256", argLength: 2, reg: fp21, asm: "VPSLLD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSRLD256", argLength: 2, reg: fp21, asm: "VPSRLD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSRAD256", argLength: 2, reg: fp21, asm: "VPSRAD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSLLVD256", argLength: 2, reg: fp21, asm: "VPSLLVD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSHLDVD256", argLength: 3, reg: fp31, asm: "VPSHLDVD", commutative: false, typ: "Vec256", resultInArg0: true}, + {name: "VPSHLDVDMasked256", argLength: 4, reg: fp3kfp, asm: "VPSHLDVD", commutative: false, typ: "Vec256", resultInArg0: true}, + {name: "VPSLLVDMasked256", argLength: 3, reg: fp2kfp, asm: "VPSLLVD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSRLVD256", argLength: 2, reg: fp21, asm: "VPSRLVD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSHRDVD256", argLength: 3, reg: fp31, asm: "VPSHRDVD", commutative: false, typ: "Vec256", resultInArg0: true}, + {name: "VPSHRDVDMasked256", argLength: 4, reg: fp3kfp, asm: "VPSHRDVD", commutative: false, typ: "Vec256", resultInArg0: true}, + {name: "VPSRLVDMasked256", argLength: 3, reg: fp2kfp, asm: "VPSRLVD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSRAVD256", argLength: 2, reg: fp21, asm: "VPSRAVD", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSRAVDMasked256", argLength: 3, reg: fp2kfp, asm: "VPSRAVD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSIGND256", argLength: 2, reg: fp21, asm: "VPSIGND", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSUBD256", argLength: 2, reg: fp21, asm: "VPSUBD", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSUBDMasked256", argLength: 3, reg: fp2kfp, asm: "VPSUBD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPDPBUSD256", argLength: 3, reg: fp31, asm: "VPDPBUSD", commutative: false, typ: "Vec256", resultInArg0: true}, + {name: "VPDPBUSDMasked256", argLength: 4, reg: fp3kfp, asm: "VPDPBUSD", commutative: false, typ: "Vec256", resultInArg0: true}, + {name: "VPXORDMasked256", argLength: 3, reg: fp2kfp, asm: "VPXORD", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPABSQ128", argLength: 1, reg: fp11, asm: "VPABSQ", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPADDQ128", argLength: 2, reg: fp21, asm: "VPADDQ", commutative: true, typ: "Vec128", resultInArg0: false}, - {name: "VPCMPEQQ128", argLength: 2, reg: fp21, asm: "VPCMPEQQ", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPABSQMasked128", argLength: 2, reg: fpkfp, asm: "VPABSQ", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPADDQ128", argLength: 2, reg: fp21, asm: "VPADDQ", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPADDQMasked128", argLength: 3, reg: fp2kfp, asm: "VPADDQ", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPANDQMasked128", argLength: 3, reg: fp2kfp, asm: "VPANDQ", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPANDNQMasked128", argLength: 3, reg: fp2kfp, asm: "VPANDNQ", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPCMPEQQ128", argLength: 2, reg: fp21, asm: "VPCMPEQQ", commutative: true, typ: "Vec128", resultInArg0: false}, + {name: "VPMAXSQ128", argLength: 2, reg: fp21, asm: "VPMAXSQ", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMAXSQMasked128", argLength: 3, reg: fp2kfp, asm: "VPMAXSQ", commutative: true, typ: "Vec128", resultInArg0: false}, + {name: "VPMINSQ128", argLength: 2, reg: fp21, asm: "VPMINSQ", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMINSQMasked128", argLength: 3, reg: fp2kfp, asm: "VPMINSQ", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMULDQMasked128", argLength: 3, reg: fp2kfp, asm: "VPMULDQ", commutative: true, typ: "Vec128", resultInArg0: false}, + {name: "VPMULLQ128", argLength: 2, reg: fp21, asm: "VPMULLQ", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMULLQMasked128", argLength: 3, reg: fp2kfp, asm: "VPMULLQ", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPORQMasked128", argLength: 3, reg: fp2kfp, asm: "VPORQ", commutative: true, typ: "Vec128", resultInArg0: false}, - {name: "VPOPCNTQMasked128", argLength: 2, reg: fpkfp, asm: "VPOPCNTQ", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPROLVQMasked128", argLength: 3, reg: fp2kfp, asm: "VPROLVQ", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPRORVQMasked128", argLength: 3, reg: fp2kfp, asm: "VPRORVQ", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPSLLQMasked128", argLength: 3, reg: fp2kfp, asm: "VPSLLQ", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPSRLQMasked128", argLength: 3, reg: fp2kfp, asm: "VPSRLQ", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPSRAQMasked128", argLength: 3, reg: fp2kfp, asm: "VPSRAQ", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPSLLVQMasked128", argLength: 3, reg: fp2kfp, asm: "VPSLLVQ", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPSHLDVQMasked128", argLength: 4, reg: fp3kfp, asm: "VPSHLDVQ", commutative: false, typ: "Vec128", resultInArg0: true}, - {name: "VPSRLVQMasked128", argLength: 3, reg: fp2kfp, asm: "VPSRLVQ", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPSHRDVQMasked128", argLength: 4, reg: fp3kfp, asm: "VPSHRDVQ", commutative: false, typ: "Vec128", resultInArg0: true}, - {name: "VPSRAVQMasked128", argLength: 3, reg: fp2kfp, asm: "VPSRAVQ", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPSUBQMasked128", argLength: 3, reg: fp2kfp, asm: "VPSUBQ", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPXORQMasked128", argLength: 3, reg: fp2kfp, asm: "VPXORQ", commutative: true, typ: "Vec128", resultInArg0: false}, - {name: "VPMAXSQ128", argLength: 2, reg: fp21, asm: "VPMAXSQ", commutative: true, typ: "Vec128", resultInArg0: false}, - {name: "VPMINSQ128", argLength: 2, reg: fp21, asm: "VPMINSQ", commutative: true, typ: "Vec128", resultInArg0: false}, - {name: "VPMULLQ128", argLength: 2, reg: fp21, asm: "VPMULLQ", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPOPCNTQ128", argLength: 1, reg: fp11, asm: "VPOPCNTQ", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPOPCNTQMasked128", argLength: 2, reg: fpkfp, asm: "VPOPCNTQ", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPROLVQ128", argLength: 2, reg: fp21, asm: "VPROLVQ", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPROLVQMasked128", argLength: 3, reg: fp2kfp, asm: "VPROLVQ", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPRORVQ128", argLength: 2, reg: fp21, asm: "VPRORVQ", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPRORVQMasked128", argLength: 3, reg: fp2kfp, asm: "VPRORVQ", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSLLQ128", argLength: 2, reg: fp21, asm: "VPSLLQ", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSLLQMasked128", argLength: 3, reg: fp2kfp, asm: "VPSLLQ", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSRLQ128", argLength: 2, reg: fp21, asm: "VPSRLQ", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSRLQMasked128", argLength: 3, reg: fp2kfp, asm: "VPSRLQ", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSRAQ128", argLength: 2, reg: fp21, asm: "VPSRAQ", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSRAQMasked128", argLength: 3, reg: fp2kfp, asm: "VPSRAQ", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSLLVQ128", argLength: 2, reg: fp21, asm: "VPSLLVQ", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSHLDVQ128", argLength: 3, reg: fp31, asm: "VPSHLDVQ", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPSHLDVQMasked128", argLength: 4, reg: fp3kfp, asm: "VPSHLDVQ", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPSLLVQMasked128", argLength: 3, reg: fp2kfp, asm: "VPSLLVQ", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSRLVQ128", argLength: 2, reg: fp21, asm: "VPSRLVQ", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSHRDVQ128", argLength: 3, reg: fp31, asm: "VPSHRDVQ", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPSHRDVQMasked128", argLength: 4, reg: fp3kfp, asm: "VPSHRDVQ", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPSRLVQMasked128", argLength: 3, reg: fp2kfp, asm: "VPSRLVQ", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSRAVQ128", argLength: 2, reg: fp21, asm: "VPSRAVQ", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSRAVQMasked128", argLength: 3, reg: fp2kfp, asm: "VPSRAVQ", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSUBQ128", argLength: 2, reg: fp21, asm: "VPSUBQ", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSUBQMasked128", argLength: 3, reg: fp2kfp, asm: "VPSUBQ", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPXORQMasked128", argLength: 3, reg: fp2kfp, asm: "VPXORQ", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPABSQ256", argLength: 1, reg: fp11, asm: "VPABSQ", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPADDQ256", argLength: 2, reg: fp21, asm: "VPADDQ", commutative: true, typ: "Vec256", resultInArg0: false}, - {name: "VPCMPEQQ256", argLength: 2, reg: fp21, asm: "VPCMPEQQ", commutative: true, typ: "Vec256", resultInArg0: false}, - {name: "VPCMPGTQ256", argLength: 2, reg: fp21, asm: "VPCMPGTQ", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPABSQMasked256", argLength: 2, reg: fpkfp, asm: "VPABSQ", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPADDQ256", argLength: 2, reg: fp21, asm: "VPADDQ", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPADDQMasked256", argLength: 3, reg: fp2kfp, asm: "VPADDQ", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPANDQMasked256", argLength: 3, reg: fp2kfp, asm: "VPANDQ", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPANDNQMasked256", argLength: 3, reg: fp2kfp, asm: "VPANDNQ", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPCMPEQQ256", argLength: 2, reg: fp21, asm: "VPCMPEQQ", commutative: true, typ: "Vec256", resultInArg0: false}, + {name: "VPCMPGTQ256", argLength: 2, reg: fp21, asm: "VPCMPGTQ", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPMAXSQ256", argLength: 2, reg: fp21, asm: "VPMAXSQ", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMAXSQMasked256", argLength: 3, reg: fp2kfp, asm: "VPMAXSQ", commutative: true, typ: "Vec256", resultInArg0: false}, + {name: "VPMINSQ256", argLength: 2, reg: fp21, asm: "VPMINSQ", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMINSQMasked256", argLength: 3, reg: fp2kfp, asm: "VPMINSQ", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMULDQMasked256", argLength: 3, reg: fp2kfp, asm: "VPMULDQ", commutative: true, typ: "Vec256", resultInArg0: false}, + {name: "VPMULLQ256", argLength: 2, reg: fp21, asm: "VPMULLQ", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMULLQMasked256", argLength: 3, reg: fp2kfp, asm: "VPMULLQ", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPORQMasked256", argLength: 3, reg: fp2kfp, asm: "VPORQ", commutative: true, typ: "Vec256", resultInArg0: false}, - {name: "VPOPCNTQMasked256", argLength: 2, reg: fpkfp, asm: "VPOPCNTQ", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPROLVQMasked256", argLength: 3, reg: fp2kfp, asm: "VPROLVQ", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPRORVQMasked256", argLength: 3, reg: fp2kfp, asm: "VPRORVQ", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPSLLQMasked256", argLength: 3, reg: fp2kfp, asm: "VPSLLQ", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPSRLQMasked256", argLength: 3, reg: fp2kfp, asm: "VPSRLQ", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPSRAQMasked256", argLength: 3, reg: fp2kfp, asm: "VPSRAQ", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPSLLVQMasked256", argLength: 3, reg: fp2kfp, asm: "VPSLLVQ", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPSHLDVQMasked256", argLength: 4, reg: fp3kfp, asm: "VPSHLDVQ", commutative: false, typ: "Vec256", resultInArg0: true}, - {name: "VPSRLVQMasked256", argLength: 3, reg: fp2kfp, asm: "VPSRLVQ", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPSHRDVQMasked256", argLength: 4, reg: fp3kfp, asm: "VPSHRDVQ", commutative: false, typ: "Vec256", resultInArg0: true}, - {name: "VPSRAVQMasked256", argLength: 3, reg: fp2kfp, asm: "VPSRAVQ", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPSUBQMasked256", argLength: 3, reg: fp2kfp, asm: "VPSUBQ", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPXORQMasked256", argLength: 3, reg: fp2kfp, asm: "VPXORQ", commutative: true, typ: "Vec256", resultInArg0: false}, - {name: "VPMAXSQ256", argLength: 2, reg: fp21, asm: "VPMAXSQ", commutative: true, typ: "Vec256", resultInArg0: false}, - {name: "VPMINSQ256", argLength: 2, reg: fp21, asm: "VPMINSQ", commutative: true, typ: "Vec256", resultInArg0: false}, - {name: "VPMULLQ256", argLength: 2, reg: fp21, asm: "VPMULLQ", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPOPCNTQ256", argLength: 1, reg: fp11, asm: "VPOPCNTQ", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPOPCNTQMasked256", argLength: 2, reg: fpkfp, asm: "VPOPCNTQ", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPROLVQ256", argLength: 2, reg: fp21, asm: "VPROLVQ", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPROLVQMasked256", argLength: 3, reg: fp2kfp, asm: "VPROLVQ", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPRORVQ256", argLength: 2, reg: fp21, asm: "VPRORVQ", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPRORVQMasked256", argLength: 3, reg: fp2kfp, asm: "VPRORVQ", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSLLQ256", argLength: 2, reg: fp21, asm: "VPSLLQ", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSLLQMasked256", argLength: 3, reg: fp2kfp, asm: "VPSLLQ", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSRLQ256", argLength: 2, reg: fp21, asm: "VPSRLQ", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSRLQMasked256", argLength: 3, reg: fp2kfp, asm: "VPSRLQ", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSRAQ256", argLength: 2, reg: fp21, asm: "VPSRAQ", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSRAQMasked256", argLength: 3, reg: fp2kfp, asm: "VPSRAQ", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSLLVQ256", argLength: 2, reg: fp21, asm: "VPSLLVQ", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSHLDVQ256", argLength: 3, reg: fp31, asm: "VPSHLDVQ", commutative: false, typ: "Vec256", resultInArg0: true}, + {name: "VPSHLDVQMasked256", argLength: 4, reg: fp3kfp, asm: "VPSHLDVQ", commutative: false, typ: "Vec256", resultInArg0: true}, + {name: "VPSLLVQMasked256", argLength: 3, reg: fp2kfp, asm: "VPSLLVQ", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSRLVQ256", argLength: 2, reg: fp21, asm: "VPSRLVQ", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSHRDVQ256", argLength: 3, reg: fp31, asm: "VPSHRDVQ", commutative: false, typ: "Vec256", resultInArg0: true}, + {name: "VPSHRDVQMasked256", argLength: 4, reg: fp3kfp, asm: "VPSHRDVQ", commutative: false, typ: "Vec256", resultInArg0: true}, + {name: "VPSRLVQMasked256", argLength: 3, reg: fp2kfp, asm: "VPSRLVQ", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSRAVQ256", argLength: 2, reg: fp21, asm: "VPSRAVQ", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSRAVQMasked256", argLength: 3, reg: fp2kfp, asm: "VPSRAVQ", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSUBQ256", argLength: 2, reg: fp21, asm: "VPSUBQ", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPABSQ512", argLength: 1, reg: fp11, asm: "VPABSQ", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPADDQ512", argLength: 2, reg: fp21, asm: "VPADDQ", commutative: true, typ: "Vec512", resultInArg0: false}, - {name: "VPANDQ512", argLength: 2, reg: fp21, asm: "VPANDQ", commutative: true, typ: "Vec512", resultInArg0: false}, - {name: "VPANDNQ512", argLength: 2, reg: fp21, asm: "VPANDNQ", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPABSQMasked512", argLength: 2, reg: fpkfp, asm: "VPABSQ", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPADDQMasked512", argLength: 3, reg: fp2kfp, asm: "VPADDQ", commutative: true, typ: "Vec512", resultInArg0: false}, - {name: "VPANDQMasked512", argLength: 3, reg: fp2kfp, asm: "VPANDQ", commutative: true, typ: "Vec512", resultInArg0: false}, - {name: "VPANDNQMasked512", argLength: 3, reg: fp2kfp, asm: "VPANDNQ", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPMAXSQMasked512", argLength: 3, reg: fp2kfp, asm: "VPMAXSQ", commutative: true, typ: "Vec512", resultInArg0: false}, - {name: "VPMINSQMasked512", argLength: 3, reg: fp2kfp, asm: "VPMINSQ", commutative: true, typ: "Vec512", resultInArg0: false}, - {name: "VPMULDQMasked512", argLength: 3, reg: fp2kfp, asm: "VPMULDQ", commutative: true, typ: "Vec512", resultInArg0: false}, - {name: "VPMULLQMasked512", argLength: 3, reg: fp2kfp, asm: "VPMULLQ", commutative: true, typ: "Vec512", resultInArg0: false}, - {name: "VPORQMasked512", argLength: 3, reg: fp2kfp, asm: "VPORQ", commutative: true, typ: "Vec512", resultInArg0: false}, - {name: "VPOPCNTQMasked512", argLength: 2, reg: fpkfp, asm: "VPOPCNTQ", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPROLVQMasked512", argLength: 3, reg: fp2kfp, asm: "VPROLVQ", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPRORVQMasked512", argLength: 3, reg: fp2kfp, asm: "VPRORVQ", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPSLLQMasked512", argLength: 3, reg: fp2kfp, asm: "VPSLLQ", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPSRLQMasked512", argLength: 3, reg: fp2kfp, asm: "VPSRLQ", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPSRAQMasked512", argLength: 3, reg: fp2kfp, asm: "VPSRAQ", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPSLLVQMasked512", argLength: 3, reg: fp2kfp, asm: "VPSLLVQ", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPSHLDVQMasked512", argLength: 4, reg: fp3kfp, asm: "VPSHLDVQ", commutative: false, typ: "Vec512", resultInArg0: true}, - {name: "VPSRLVQMasked512", argLength: 3, reg: fp2kfp, asm: "VPSRLVQ", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPSHRDVQMasked512", argLength: 4, reg: fp3kfp, asm: "VPSHRDVQ", commutative: false, typ: "Vec512", resultInArg0: true}, - {name: "VPSRAVQMasked512", argLength: 3, reg: fp2kfp, asm: "VPSRAVQ", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPSUBQMasked512", argLength: 3, reg: fp2kfp, asm: "VPSUBQ", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPXORQMasked512", argLength: 3, reg: fp2kfp, asm: "VPXORQ", commutative: true, typ: "Vec512", resultInArg0: false}, + {name: "VPSUBQMasked256", argLength: 3, reg: fp2kfp, asm: "VPSUBQ", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPXORQMasked256", argLength: 3, reg: fp2kfp, asm: "VPXORQ", commutative: true, typ: "Vec256", resultInArg0: false}, + {name: "VPABSQ512", argLength: 1, reg: fp11, asm: "VPABSQ", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPABSQMasked512", argLength: 2, reg: fpkfp, asm: "VPABSQ", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPADDQ512", argLength: 2, reg: fp21, asm: "VPADDQ", commutative: true, typ: "Vec512", resultInArg0: false}, + {name: "VPADDQMasked512", argLength: 3, reg: fp2kfp, asm: "VPADDQ", commutative: true, typ: "Vec512", resultInArg0: false}, + {name: "VPANDQ512", argLength: 2, reg: fp21, asm: "VPANDQ", commutative: true, typ: "Vec512", resultInArg0: false}, + {name: "VPANDQMasked512", argLength: 3, reg: fp2kfp, asm: "VPANDQ", commutative: true, typ: "Vec512", resultInArg0: false}, + {name: "VPANDNQ512", argLength: 2, reg: fp21, asm: "VPANDNQ", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPANDNQMasked512", argLength: 3, reg: fp2kfp, asm: "VPANDNQ", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPMAXSQ512", argLength: 2, reg: fp21, asm: "VPMAXSQ", commutative: true, typ: "Vec512", resultInArg0: false}, + {name: "VPMAXSQMasked512", argLength: 3, reg: fp2kfp, asm: "VPMAXSQ", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMINSQ512", argLength: 2, reg: fp21, asm: "VPMINSQ", commutative: true, typ: "Vec512", resultInArg0: false}, + {name: "VPMINSQMasked512", argLength: 3, reg: fp2kfp, asm: "VPMINSQ", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMULDQ512", argLength: 2, reg: fp21, asm: "VPMULDQ", commutative: true, typ: "Vec512", resultInArg0: false}, + {name: "VPMULDQMasked512", argLength: 3, reg: fp2kfp, asm: "VPMULDQ", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMULLQ512", argLength: 2, reg: fp21, asm: "VPMULLQ", commutative: true, typ: "Vec512", resultInArg0: false}, + {name: "VPMULLQMasked512", argLength: 3, reg: fp2kfp, asm: "VPMULLQ", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPORQ512", argLength: 2, reg: fp21, asm: "VPORQ", commutative: true, typ: "Vec512", resultInArg0: false}, + {name: "VPORQMasked512", argLength: 3, reg: fp2kfp, asm: "VPORQ", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPOPCNTQ512", argLength: 1, reg: fp11, asm: "VPOPCNTQ", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPOPCNTQMasked512", argLength: 2, reg: fpkfp, asm: "VPOPCNTQ", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPROLVQ512", argLength: 2, reg: fp21, asm: "VPROLVQ", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPROLVQMasked512", argLength: 3, reg: fp2kfp, asm: "VPROLVQ", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPRORVQ512", argLength: 2, reg: fp21, asm: "VPRORVQ", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPRORVQMasked512", argLength: 3, reg: fp2kfp, asm: "VPRORVQ", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSLLQ512", argLength: 2, reg: fp21, asm: "VPSLLQ", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSLLQMasked512", argLength: 3, reg: fp2kfp, asm: "VPSLLQ", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSRLQ512", argLength: 2, reg: fp21, asm: "VPSRLQ", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSRLQMasked512", argLength: 3, reg: fp2kfp, asm: "VPSRLQ", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSRAQ512", argLength: 2, reg: fp21, asm: "VPSRAQ", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSRAQMasked512", argLength: 3, reg: fp2kfp, asm: "VPSRAQ", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSLLVQ512", argLength: 2, reg: fp21, asm: "VPSLLVQ", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSHLDVQ512", argLength: 3, reg: fp31, asm: "VPSHLDVQ", commutative: false, typ: "Vec512", resultInArg0: true}, + {name: "VPSHLDVQMasked512", argLength: 4, reg: fp3kfp, asm: "VPSHLDVQ", commutative: false, typ: "Vec512", resultInArg0: true}, + {name: "VPSLLVQMasked512", argLength: 3, reg: fp2kfp, asm: "VPSLLVQ", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSRLVQ512", argLength: 2, reg: fp21, asm: "VPSRLVQ", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSHRDVQ512", argLength: 3, reg: fp31, asm: "VPSHRDVQ", commutative: false, typ: "Vec512", resultInArg0: true}, + {name: "VPSHRDVQMasked512", argLength: 4, reg: fp3kfp, asm: "VPSHRDVQ", commutative: false, typ: "Vec512", resultInArg0: true}, + {name: "VPSRLVQMasked512", argLength: 3, reg: fp2kfp, asm: "VPSRLVQ", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSRAVQ512", argLength: 2, reg: fp21, asm: "VPSRAVQ", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSRAVQMasked512", argLength: 3, reg: fp2kfp, asm: "VPSRAVQ", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSUBQ512", argLength: 2, reg: fp21, asm: "VPSUBQ", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSUBQMasked512", argLength: 3, reg: fp2kfp, asm: "VPSUBQ", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPXORQ512", argLength: 2, reg: fp21, asm: "VPXORQ", commutative: true, typ: "Vec512", resultInArg0: false}, + {name: "VPXORQMasked512", argLength: 3, reg: fp2kfp, asm: "VPXORQ", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPABSB128", argLength: 1, reg: fp11, asm: "VPABSB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPABSBMasked128", argLength: 2, reg: fpkfp, asm: "VPABSB", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPADDB128", argLength: 2, reg: fp21, asm: "VPADDB", commutative: true, typ: "Vec128", resultInArg0: false}, + {name: "VPADDBMasked128", argLength: 3, reg: fp2kfp, asm: "VPADDB", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPAND128", argLength: 2, reg: fp21, asm: "VPAND", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPANDN128", argLength: 2, reg: fp21, asm: "VPANDN", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPCMPEQB128", argLength: 2, reg: fp21, asm: "VPCMPEQB", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPCMPGTB128", argLength: 2, reg: fp21, asm: "VPCMPGTB", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPABSBMasked128", argLength: 2, reg: fpkfp, asm: "VPABSB", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPADDBMasked128", argLength: 3, reg: fp2kfp, asm: "VPADDB", commutative: true, typ: "Vec128", resultInArg0: false}, - {name: "VPMAXSBMasked128", argLength: 3, reg: fp2kfp, asm: "VPMAXSB", commutative: true, typ: "Vec128", resultInArg0: false}, - {name: "VPMINSBMasked128", argLength: 3, reg: fp2kfp, asm: "VPMINSB", commutative: true, typ: "Vec128", resultInArg0: false}, - {name: "VPOPCNTBMasked128", argLength: 2, reg: fpkfp, asm: "VPOPCNTB", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPADDSBMasked128", argLength: 3, reg: fp2kfp, asm: "VPADDSB", commutative: true, typ: "Vec128", resultInArg0: false}, - {name: "VPSUBSBMasked128", argLength: 3, reg: fp2kfp, asm: "VPSUBSB", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPSUBBMasked128", argLength: 3, reg: fp2kfp, asm: "VPSUBB", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMAXSB128", argLength: 2, reg: fp21, asm: "VPMAXSB", commutative: true, typ: "Vec128", resultInArg0: false}, + {name: "VPMAXSBMasked128", argLength: 3, reg: fp2kfp, asm: "VPMAXSB", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMINSB128", argLength: 2, reg: fp21, asm: "VPMINSB", commutative: true, typ: "Vec128", resultInArg0: false}, + {name: "VPMINSBMasked128", argLength: 3, reg: fp2kfp, asm: "VPMINSB", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPOR128", argLength: 2, reg: fp21, asm: "VPOR", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPOPCNTB128", argLength: 1, reg: fp11, asm: "VPOPCNTB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPOPCNTBMasked128", argLength: 2, reg: fpkfp, asm: "VPOPCNTB", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPADDSB128", argLength: 2, reg: fp21, asm: "VPADDSB", commutative: true, typ: "Vec128", resultInArg0: false}, + {name: "VPADDSBMasked128", argLength: 3, reg: fp2kfp, asm: "VPADDSB", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPSUBSB128", argLength: 2, reg: fp21, asm: "VPSUBSB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSUBSBMasked128", argLength: 3, reg: fp2kfp, asm: "VPSUBSB", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSIGNB128", argLength: 2, reg: fp21, asm: "VPSIGNB", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSUBB128", argLength: 2, reg: fp21, asm: "VPSUBB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSUBBMasked128", argLength: 3, reg: fp2kfp, asm: "VPSUBB", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPXOR128", argLength: 2, reg: fp21, asm: "VPXOR", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPABSB256", argLength: 1, reg: fp11, asm: "VPABSB", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPABSBMasked256", argLength: 2, reg: fpkfp, asm: "VPABSB", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPADDB256", argLength: 2, reg: fp21, asm: "VPADDB", commutative: true, typ: "Vec256", resultInArg0: false}, + {name: "VPADDBMasked256", argLength: 3, reg: fp2kfp, asm: "VPADDB", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPAND256", argLength: 2, reg: fp21, asm: "VPAND", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPANDN256", argLength: 2, reg: fp21, asm: "VPANDN", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPCMPEQB256", argLength: 2, reg: fp21, asm: "VPCMPEQB", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPCMPGTB256", argLength: 2, reg: fp21, asm: "VPCMPGTB", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPABSBMasked256", argLength: 2, reg: fpkfp, asm: "VPABSB", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPADDBMasked256", argLength: 3, reg: fp2kfp, asm: "VPADDB", commutative: true, typ: "Vec256", resultInArg0: false}, - {name: "VPMAXSBMasked256", argLength: 3, reg: fp2kfp, asm: "VPMAXSB", commutative: true, typ: "Vec256", resultInArg0: false}, - {name: "VPMINSBMasked256", argLength: 3, reg: fp2kfp, asm: "VPMINSB", commutative: true, typ: "Vec256", resultInArg0: false}, - {name: "VPOPCNTBMasked256", argLength: 2, reg: fpkfp, asm: "VPOPCNTB", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPADDSBMasked256", argLength: 3, reg: fp2kfp, asm: "VPADDSB", commutative: true, typ: "Vec256", resultInArg0: false}, - {name: "VPSUBSBMasked256", argLength: 3, reg: fp2kfp, asm: "VPSUBSB", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPSUBBMasked256", argLength: 3, reg: fp2kfp, asm: "VPSUBB", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPMAXSB256", argLength: 2, reg: fp21, asm: "VPMAXSB", commutative: true, typ: "Vec256", resultInArg0: false}, + {name: "VPMAXSBMasked256", argLength: 3, reg: fp2kfp, asm: "VPMAXSB", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMINSB256", argLength: 2, reg: fp21, asm: "VPMINSB", commutative: true, typ: "Vec256", resultInArg0: false}, + {name: "VPMINSBMasked256", argLength: 3, reg: fp2kfp, asm: "VPMINSB", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPOR256", argLength: 2, reg: fp21, asm: "VPOR", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPOPCNTB256", argLength: 1, reg: fp11, asm: "VPOPCNTB", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPOPCNTBMasked256", argLength: 2, reg: fpkfp, asm: "VPOPCNTB", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPADDSB256", argLength: 2, reg: fp21, asm: "VPADDSB", commutative: true, typ: "Vec256", resultInArg0: false}, + {name: "VPADDSBMasked256", argLength: 3, reg: fp2kfp, asm: "VPADDSB", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPSUBSB256", argLength: 2, reg: fp21, asm: "VPSUBSB", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSUBSBMasked256", argLength: 3, reg: fp2kfp, asm: "VPSUBSB", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSIGNB256", argLength: 2, reg: fp21, asm: "VPSIGNB", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSUBB256", argLength: 2, reg: fp21, asm: "VPSUBB", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSUBBMasked256", argLength: 3, reg: fp2kfp, asm: "VPSUBB", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPXOR256", argLength: 2, reg: fp21, asm: "VPXOR", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPABSB512", argLength: 1, reg: fp11, asm: "VPABSB", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPADDB512", argLength: 2, reg: fp21, asm: "VPADDB", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPABSBMasked512", argLength: 2, reg: fpkfp, asm: "VPABSB", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPADDB512", argLength: 2, reg: fp21, asm: "VPADDB", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPADDBMasked512", argLength: 3, reg: fp2kfp, asm: "VPADDB", commutative: true, typ: "Vec512", resultInArg0: false}, - {name: "VPMAXSBMasked512", argLength: 3, reg: fp2kfp, asm: "VPMAXSB", commutative: true, typ: "Vec512", resultInArg0: false}, - {name: "VPMINSBMasked512", argLength: 3, reg: fp2kfp, asm: "VPMINSB", commutative: true, typ: "Vec512", resultInArg0: false}, - {name: "VPOPCNTBMasked512", argLength: 2, reg: fpkfp, asm: "VPOPCNTB", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPADDSBMasked512", argLength: 3, reg: fp2kfp, asm: "VPADDSB", commutative: true, typ: "Vec512", resultInArg0: false}, - {name: "VPSUBSBMasked512", argLength: 3, reg: fp2kfp, asm: "VPSUBSB", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPSUBBMasked512", argLength: 3, reg: fp2kfp, asm: "VPSUBB", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPMAXSB512", argLength: 2, reg: fp21, asm: "VPMAXSB", commutative: true, typ: "Vec512", resultInArg0: false}, + {name: "VPMAXSBMasked512", argLength: 3, reg: fp2kfp, asm: "VPMAXSB", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMINSB512", argLength: 2, reg: fp21, asm: "VPMINSB", commutative: true, typ: "Vec512", resultInArg0: false}, + {name: "VPMINSBMasked512", argLength: 3, reg: fp2kfp, asm: "VPMINSB", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPOPCNTB512", argLength: 1, reg: fp11, asm: "VPOPCNTB", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPOPCNTBMasked512", argLength: 2, reg: fpkfp, asm: "VPOPCNTB", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPADDSB512", argLength: 2, reg: fp21, asm: "VPADDSB", commutative: true, typ: "Vec512", resultInArg0: false}, + {name: "VPADDSBMasked512", argLength: 3, reg: fp2kfp, asm: "VPADDSB", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPSUBSB512", argLength: 2, reg: fp21, asm: "VPSUBSB", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSUBSBMasked512", argLength: 3, reg: fp2kfp, asm: "VPSUBSB", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSUBB512", argLength: 2, reg: fp21, asm: "VPSUBB", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSUBBMasked512", argLength: 3, reg: fp2kfp, asm: "VPSUBB", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPAVGW256", argLength: 2, reg: fp21, asm: "VPAVGW", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPAVGWMasked256", argLength: 3, reg: fp2kfp, asm: "VPAVGW", commutative: true, typ: "Vec256", resultInArg0: false}, - {name: "VPMAXUWMasked256", argLength: 3, reg: fp2kfp, asm: "VPMAXUW", commutative: true, typ: "Vec256", resultInArg0: false}, - {name: "VPMINUWMasked256", argLength: 3, reg: fp2kfp, asm: "VPMINUW", commutative: true, typ: "Vec256", resultInArg0: false}, - {name: "VPMULHUWMasked256", argLength: 3, reg: fp2kfp, asm: "VPMULHUW", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMAXUW256", argLength: 2, reg: fp21, asm: "VPMAXUW", commutative: true, typ: "Vec256", resultInArg0: false}, + {name: "VPMAXUWMasked256", argLength: 3, reg: fp2kfp, asm: "VPMAXUW", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMINUW256", argLength: 2, reg: fp21, asm: "VPMINUW", commutative: true, typ: "Vec256", resultInArg0: false}, + {name: "VPMINUWMasked256", argLength: 3, reg: fp2kfp, asm: "VPMINUW", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMULHUW256", argLength: 2, reg: fp21, asm: "VPMULHUW", commutative: true, typ: "Vec256", resultInArg0: false}, + {name: "VPMULHUWMasked256", argLength: 3, reg: fp2kfp, asm: "VPMULHUW", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPAVGW512", argLength: 2, reg: fp21, asm: "VPAVGW", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPAVGWMasked512", argLength: 3, reg: fp2kfp, asm: "VPAVGW", commutative: true, typ: "Vec512", resultInArg0: false}, - {name: "VPMAXUWMasked512", argLength: 3, reg: fp2kfp, asm: "VPMAXUW", commutative: true, typ: "Vec512", resultInArg0: false}, - {name: "VPMINUWMasked512", argLength: 3, reg: fp2kfp, asm: "VPMINUW", commutative: true, typ: "Vec512", resultInArg0: false}, - {name: "VPMULHUWMasked512", argLength: 3, reg: fp2kfp, asm: "VPMULHUW", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMAXUW512", argLength: 2, reg: fp21, asm: "VPMAXUW", commutative: true, typ: "Vec512", resultInArg0: false}, + {name: "VPMAXUWMasked512", argLength: 3, reg: fp2kfp, asm: "VPMAXUW", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMINUW512", argLength: 2, reg: fp21, asm: "VPMINUW", commutative: true, typ: "Vec512", resultInArg0: false}, + {name: "VPMINUWMasked512", argLength: 3, reg: fp2kfp, asm: "VPMINUW", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMULHUW512", argLength: 2, reg: fp21, asm: "VPMULHUW", commutative: true, typ: "Vec512", resultInArg0: false}, + {name: "VPMULHUWMasked512", argLength: 3, reg: fp2kfp, asm: "VPMULHUW", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPAVGW128", argLength: 2, reg: fp21, asm: "VPAVGW", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPAVGWMasked128", argLength: 3, reg: fp2kfp, asm: "VPAVGW", commutative: true, typ: "Vec128", resultInArg0: false}, - {name: "VPMAXUWMasked128", argLength: 3, reg: fp2kfp, asm: "VPMAXUW", commutative: true, typ: "Vec128", resultInArg0: false}, - {name: "VPMINUWMasked128", argLength: 3, reg: fp2kfp, asm: "VPMINUW", commutative: true, typ: "Vec128", resultInArg0: false}, - {name: "VPMULHUWMasked128", argLength: 3, reg: fp2kfp, asm: "VPMULHUW", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMAXUW128", argLength: 2, reg: fp21, asm: "VPMAXUW", commutative: true, typ: "Vec128", resultInArg0: false}, + {name: "VPMAXUWMasked128", argLength: 3, reg: fp2kfp, asm: "VPMAXUW", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMINUW128", argLength: 2, reg: fp21, asm: "VPMINUW", commutative: true, typ: "Vec128", resultInArg0: false}, + {name: "VPMINUWMasked128", argLength: 3, reg: fp2kfp, asm: "VPMINUW", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMULHUW128", argLength: 2, reg: fp21, asm: "VPMULHUW", commutative: true, typ: "Vec128", resultInArg0: false}, - {name: "VPMAXUDMasked512", argLength: 3, reg: fp2kfp, asm: "VPMAXUD", commutative: true, typ: "Vec512", resultInArg0: false}, - {name: "VPMINUDMasked512", argLength: 3, reg: fp2kfp, asm: "VPMINUD", commutative: true, typ: "Vec512", resultInArg0: false}, + {name: "VPMULHUWMasked128", argLength: 3, reg: fp2kfp, asm: "VPMULHUW", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMAXUD512", argLength: 2, reg: fp21, asm: "VPMAXUD", commutative: true, typ: "Vec512", resultInArg0: false}, + {name: "VPMAXUDMasked512", argLength: 3, reg: fp2kfp, asm: "VPMAXUD", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMINUD512", argLength: 2, reg: fp21, asm: "VPMINUD", commutative: true, typ: "Vec512", resultInArg0: false}, - {name: "VPMAXUDMasked128", argLength: 3, reg: fp2kfp, asm: "VPMAXUD", commutative: true, typ: "Vec128", resultInArg0: false}, - {name: "VPMINUDMasked128", argLength: 3, reg: fp2kfp, asm: "VPMINUD", commutative: true, typ: "Vec128", resultInArg0: false}, + {name: "VPMINUDMasked512", argLength: 3, reg: fp2kfp, asm: "VPMINUD", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMAXUD128", argLength: 2, reg: fp21, asm: "VPMAXUD", commutative: true, typ: "Vec128", resultInArg0: false}, + {name: "VPMAXUDMasked128", argLength: 3, reg: fp2kfp, asm: "VPMAXUD", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMINUD128", argLength: 2, reg: fp21, asm: "VPMINUD", commutative: true, typ: "Vec128", resultInArg0: false}, + {name: "VPMINUDMasked128", argLength: 3, reg: fp2kfp, asm: "VPMINUD", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMULUDQ128", argLength: 2, reg: fp21, asm: "VPMULUDQ", commutative: true, typ: "Vec128", resultInArg0: false}, - {name: "VPMAXUDMasked256", argLength: 3, reg: fp2kfp, asm: "VPMAXUD", commutative: true, typ: "Vec256", resultInArg0: false}, - {name: "VPMINUDMasked256", argLength: 3, reg: fp2kfp, asm: "VPMINUD", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMAXUD256", argLength: 2, reg: fp21, asm: "VPMAXUD", commutative: true, typ: "Vec256", resultInArg0: false}, + {name: "VPMAXUDMasked256", argLength: 3, reg: fp2kfp, asm: "VPMAXUD", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMINUD256", argLength: 2, reg: fp21, asm: "VPMINUD", commutative: true, typ: "Vec256", resultInArg0: false}, + {name: "VPMINUDMasked256", argLength: 3, reg: fp2kfp, asm: "VPMINUD", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMULUDQ256", argLength: 2, reg: fp21, asm: "VPMULUDQ", commutative: true, typ: "Vec256", resultInArg0: false}, + {name: "VPMAXUQ128", argLength: 2, reg: fp21, asm: "VPMAXUQ", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMAXUQMasked128", argLength: 3, reg: fp2kfp, asm: "VPMAXUQ", commutative: true, typ: "Vec128", resultInArg0: false}, + {name: "VPMINUQ128", argLength: 2, reg: fp21, asm: "VPMINUQ", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMINUQMasked128", argLength: 3, reg: fp2kfp, asm: "VPMINUQ", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMULUDQMasked128", argLength: 3, reg: fp2kfp, asm: "VPMULUDQ", commutative: true, typ: "Vec128", resultInArg0: false}, - {name: "VPMAXUQ128", argLength: 2, reg: fp21, asm: "VPMAXUQ", commutative: true, typ: "Vec128", resultInArg0: false}, - {name: "VPMINUQ128", argLength: 2, reg: fp21, asm: "VPMINUQ", commutative: true, typ: "Vec128", resultInArg0: false}, + {name: "VPMAXUQ256", argLength: 2, reg: fp21, asm: "VPMAXUQ", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMAXUQMasked256", argLength: 3, reg: fp2kfp, asm: "VPMAXUQ", commutative: true, typ: "Vec256", resultInArg0: false}, + {name: "VPMINUQ256", argLength: 2, reg: fp21, asm: "VPMINUQ", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMINUQMasked256", argLength: 3, reg: fp2kfp, asm: "VPMINUQ", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMULUDQMasked256", argLength: 3, reg: fp2kfp, asm: "VPMULUDQ", commutative: true, typ: "Vec256", resultInArg0: false}, - {name: "VPMAXUQ256", argLength: 2, reg: fp21, asm: "VPMAXUQ", commutative: true, typ: "Vec256", resultInArg0: false}, - {name: "VPMINUQ256", argLength: 2, reg: fp21, asm: "VPMINUQ", commutative: true, typ: "Vec256", resultInArg0: false}, - {name: "VPMAXUQMasked512", argLength: 3, reg: fp2kfp, asm: "VPMAXUQ", commutative: true, typ: "Vec512", resultInArg0: false}, - {name: "VPMINUQMasked512", argLength: 3, reg: fp2kfp, asm: "VPMINUQ", commutative: true, typ: "Vec512", resultInArg0: false}, - {name: "VPMULUDQMasked512", argLength: 3, reg: fp2kfp, asm: "VPMULUDQ", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMAXUQ512", argLength: 2, reg: fp21, asm: "VPMAXUQ", commutative: true, typ: "Vec512", resultInArg0: false}, + {name: "VPMAXUQMasked512", argLength: 3, reg: fp2kfp, asm: "VPMAXUQ", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMINUQ512", argLength: 2, reg: fp21, asm: "VPMINUQ", commutative: true, typ: "Vec512", resultInArg0: false}, + {name: "VPMINUQMasked512", argLength: 3, reg: fp2kfp, asm: "VPMINUQ", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMULUDQ512", argLength: 2, reg: fp21, asm: "VPMULUDQ", commutative: true, typ: "Vec512", resultInArg0: false}, + {name: "VPMULUDQMasked512", argLength: 3, reg: fp2kfp, asm: "VPMULUDQ", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPAVGB128", argLength: 2, reg: fp21, asm: "VPAVGB", commutative: true, typ: "Vec128", resultInArg0: false}, - {name: "VGF2P8MULB128", argLength: 2, reg: fp21, asm: "VGF2P8MULB", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPAVGBMasked128", argLength: 3, reg: fp2kfp, asm: "VPAVGB", commutative: true, typ: "Vec128", resultInArg0: false}, + {name: "VGF2P8MULB128", argLength: 2, reg: fp21, asm: "VGF2P8MULB", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VGF2P8MULBMasked128", argLength: 3, reg: fp2kfp, asm: "VGF2P8MULB", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPMAXUBMasked128", argLength: 3, reg: fp2kfp, asm: "VPMAXUB", commutative: true, typ: "Vec128", resultInArg0: false}, - {name: "VPMINUBMasked128", argLength: 3, reg: fp2kfp, asm: "VPMINUB", commutative: true, typ: "Vec128", resultInArg0: false}, - {name: "VPMADDUBSWMasked128", argLength: 3, reg: fp2kfp, asm: "VPMADDUBSW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMAXUB128", argLength: 2, reg: fp21, asm: "VPMAXUB", commutative: true, typ: "Vec128", resultInArg0: false}, + {name: "VPMAXUBMasked128", argLength: 3, reg: fp2kfp, asm: "VPMAXUB", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMINUB128", argLength: 2, reg: fp21, asm: "VPMINUB", commutative: true, typ: "Vec128", resultInArg0: false}, + {name: "VPMINUBMasked128", argLength: 3, reg: fp2kfp, asm: "VPMINUB", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMADDUBSW128", argLength: 2, reg: fp21, asm: "VPMADDUBSW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMADDUBSWMasked128", argLength: 3, reg: fp2kfp, asm: "VPMADDUBSW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPAVGB256", argLength: 2, reg: fp21, asm: "VPAVGB", commutative: true, typ: "Vec256", resultInArg0: false}, - {name: "VGF2P8MULB256", argLength: 2, reg: fp21, asm: "VGF2P8MULB", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPAVGBMasked256", argLength: 3, reg: fp2kfp, asm: "VPAVGB", commutative: true, typ: "Vec256", resultInArg0: false}, + {name: "VGF2P8MULB256", argLength: 2, reg: fp21, asm: "VGF2P8MULB", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VGF2P8MULBMasked256", argLength: 3, reg: fp2kfp, asm: "VGF2P8MULB", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPMAXUBMasked256", argLength: 3, reg: fp2kfp, asm: "VPMAXUB", commutative: true, typ: "Vec256", resultInArg0: false}, - {name: "VPMINUBMasked256", argLength: 3, reg: fp2kfp, asm: "VPMINUB", commutative: true, typ: "Vec256", resultInArg0: false}, - {name: "VPMADDUBSWMasked256", argLength: 3, reg: fp2kfp, asm: "VPMADDUBSW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPMAXUB256", argLength: 2, reg: fp21, asm: "VPMAXUB", commutative: true, typ: "Vec256", resultInArg0: false}, + {name: "VPMAXUBMasked256", argLength: 3, reg: fp2kfp, asm: "VPMAXUB", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMINUB256", argLength: 2, reg: fp21, asm: "VPMINUB", commutative: true, typ: "Vec256", resultInArg0: false}, + {name: "VPMINUBMasked256", argLength: 3, reg: fp2kfp, asm: "VPMINUB", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMADDUBSW256", argLength: 2, reg: fp21, asm: "VPMADDUBSW", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPMADDUBSWMasked256", argLength: 3, reg: fp2kfp, asm: "VPMADDUBSW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPAVGB512", argLength: 2, reg: fp21, asm: "VPAVGB", commutative: true, typ: "Vec512", resultInArg0: false}, - {name: "VGF2P8MULB512", argLength: 2, reg: fp21, asm: "VGF2P8MULB", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPAVGBMasked512", argLength: 3, reg: fp2kfp, asm: "VPAVGB", commutative: true, typ: "Vec512", resultInArg0: false}, + {name: "VGF2P8MULB512", argLength: 2, reg: fp21, asm: "VGF2P8MULB", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VGF2P8MULBMasked512", argLength: 3, reg: fp2kfp, asm: "VGF2P8MULB", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPMAXUBMasked512", argLength: 3, reg: fp2kfp, asm: "VPMAXUB", commutative: true, typ: "Vec512", resultInArg0: false}, - {name: "VPMINUBMasked512", argLength: 3, reg: fp2kfp, asm: "VPMINUB", commutative: true, typ: "Vec512", resultInArg0: false}, - {name: "VPMADDUBSWMasked512", argLength: 3, reg: fp2kfp, asm: "VPMADDUBSW", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPMAXUB512", argLength: 2, reg: fp21, asm: "VPMAXUB", commutative: true, typ: "Vec512", resultInArg0: false}, + {name: "VPMAXUBMasked512", argLength: 3, reg: fp2kfp, asm: "VPMAXUB", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMINUB512", argLength: 2, reg: fp21, asm: "VPMINUB", commutative: true, typ: "Vec512", resultInArg0: false}, + {name: "VPMINUBMasked512", argLength: 3, reg: fp2kfp, asm: "VPMINUB", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMADDUBSW512", argLength: 2, reg: fp21, asm: "VPMADDUBSW", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPMADDUBSWMasked512", argLength: 3, reg: fp2kfp, asm: "VPMADDUBSW", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VRNDSCALEPS512", argLength: 1, reg: fp11, asm: "VRNDSCALEPS", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VREDUCEPS512", argLength: 1, reg: fp11, asm: "VREDUCEPS", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VCMPPS512", argLength: 2, reg: fp2k, asm: "VCMPPS", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VRNDSCALEPSMasked512", argLength: 2, reg: fpkfp, asm: "VRNDSCALEPS", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VREDUCEPS512", argLength: 1, reg: fp11, asm: "VREDUCEPS", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VREDUCEPSMasked512", argLength: 2, reg: fpkfp, asm: "VREDUCEPS", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VCMPPS512", argLength: 2, reg: fp2k, asm: "VCMPPS", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VCMPPSMasked512", argLength: 3, reg: fp2kk, asm: "VCMPPS", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VROUNDPS128", argLength: 1, reg: fp11, asm: "VROUNDPS", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VRNDSCALEPS128", argLength: 1, reg: fp11, asm: "VRNDSCALEPS", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VREDUCEPS128", argLength: 1, reg: fp11, asm: "VREDUCEPS", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VCMPPS128", argLength: 2, reg: fp21, asm: "VCMPPS", aux: "Int8", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VRNDSCALEPSMasked128", argLength: 2, reg: fpkfp, asm: "VRNDSCALEPS", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VREDUCEPS128", argLength: 1, reg: fp11, asm: "VREDUCEPS", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VREDUCEPSMasked128", argLength: 2, reg: fpkfp, asm: "VREDUCEPS", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VCMPPS128", argLength: 2, reg: fp21, asm: "VCMPPS", aux: "Int8", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VCMPPSMasked128", argLength: 3, reg: fp2kk, asm: "VCMPPS", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VROUNDPS256", argLength: 1, reg: fp11, asm: "VROUNDPS", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VRNDSCALEPS256", argLength: 1, reg: fp11, asm: "VRNDSCALEPS", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VREDUCEPS256", argLength: 1, reg: fp11, asm: "VREDUCEPS", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VCMPPS256", argLength: 2, reg: fp21, asm: "VCMPPS", aux: "Int8", commutative: true, typ: "Vec256", resultInArg0: false}, - {name: "VEXTRACTF128128", argLength: 1, reg: fp11, asm: "VEXTRACTF128", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VRNDSCALEPSMasked256", argLength: 2, reg: fpkfp, asm: "VRNDSCALEPS", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VREDUCEPS256", argLength: 1, reg: fp11, asm: "VREDUCEPS", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VREDUCEPSMasked256", argLength: 2, reg: fpkfp, asm: "VREDUCEPS", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VCMPPS256", argLength: 2, reg: fp21, asm: "VCMPPS", aux: "Int8", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VCMPPSMasked256", argLength: 3, reg: fp2kk, asm: "VCMPPS", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, + {name: "VEXTRACTF128128", argLength: 1, reg: fp11, asm: "VEXTRACTF128", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VINSERTF128256", argLength: 2, reg: fp21, asm: "VINSERTF128", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VROUNDPD128", argLength: 1, reg: fp11, asm: "VROUNDPD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VRNDSCALEPD128", argLength: 1, reg: fp11, asm: "VRNDSCALEPD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VRNDSCALEPDMasked128", argLength: 2, reg: fpkfp, asm: "VRNDSCALEPD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VREDUCEPD128", argLength: 1, reg: fp11, asm: "VREDUCEPD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VREDUCEPDMasked128", argLength: 2, reg: fpkfp, asm: "VREDUCEPD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VDPPD128", argLength: 2, reg: fp21, asm: "VDPPD", aux: "Int8", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VCMPPD128", argLength: 2, reg: fp21, asm: "VCMPPD", aux: "Int8", commutative: true, typ: "Vec128", resultInArg0: false}, - {name: "VRNDSCALEPDMasked128", argLength: 2, reg: fpkfp, asm: "VRNDSCALEPD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VREDUCEPDMasked128", argLength: 2, reg: fpkfp, asm: "VREDUCEPD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VCMPPDMasked128", argLength: 3, reg: fp2kk, asm: "VCMPPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VROUNDPD256", argLength: 1, reg: fp11, asm: "VROUNDPD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VRNDSCALEPD256", argLength: 1, reg: fp11, asm: "VRNDSCALEPD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VREDUCEPD256", argLength: 1, reg: fp11, asm: "VREDUCEPD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VCMPPD256", argLength: 2, reg: fp21, asm: "VCMPPD", aux: "Int8", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VRNDSCALEPDMasked256", argLength: 2, reg: fpkfp, asm: "VRNDSCALEPD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VREDUCEPD256", argLength: 1, reg: fp11, asm: "VREDUCEPD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VREDUCEPDMasked256", argLength: 2, reg: fpkfp, asm: "VREDUCEPD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VCMPPD256", argLength: 2, reg: fp21, asm: "VCMPPD", aux: "Int8", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VCMPPDMasked256", argLength: 3, reg: fp2kk, asm: "VCMPPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VRNDSCALEPD512", argLength: 1, reg: fp11, asm: "VRNDSCALEPD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VREDUCEPD512", argLength: 1, reg: fp11, asm: "VREDUCEPD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VCMPPD512", argLength: 2, reg: fp2k, asm: "VCMPPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VRNDSCALEPDMasked512", argLength: 2, reg: fpkfp, asm: "VRNDSCALEPD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VREDUCEPD512", argLength: 1, reg: fp11, asm: "VREDUCEPD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VREDUCEPDMasked512", argLength: 2, reg: fpkfp, asm: "VREDUCEPD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VCMPPD512", argLength: 2, reg: fp2k, asm: "VCMPPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VCMPPDMasked512", argLength: 3, reg: fp2kk, asm: "VCMPPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, - {name: "VPCMPW256", argLength: 2, reg: fp2k, asm: "VPCMPW", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPCMPWMasked256", argLength: 3, reg: fp2kk, asm: "VPCMPW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, - {name: "VPSHLDWMasked256", argLength: 3, reg: fp2kfp, asm: "VPSHLDW", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPSHRDWMasked256", argLength: 3, reg: fp2kfp, asm: "VPSHRDW", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPCMPW256", argLength: 2, reg: fp2k, asm: "VPCMPW", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPSHLDW256", argLength: 2, reg: fp21, asm: "VPSHLDW", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSHLDWMasked256", argLength: 3, reg: fp2kfp, asm: "VPSHLDW", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSHRDW256", argLength: 2, reg: fp21, asm: "VPSHRDW", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSHRDWMasked256", argLength: 3, reg: fp2kfp, asm: "VPSHRDW", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPCMPW512", argLength: 2, reg: fp2k, asm: "VPCMPW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPWMasked512", argLength: 3, reg: fp2kk, asm: "VPCMPW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, - {name: "VPSHLDWMasked512", argLength: 3, reg: fp2kfp, asm: "VPSHLDW", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPSHRDWMasked512", argLength: 3, reg: fp2kfp, asm: "VPSHRDW", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSHLDW512", argLength: 2, reg: fp21, asm: "VPSHLDW", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSHLDWMasked512", argLength: 3, reg: fp2kfp, asm: "VPSHLDW", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSHRDW512", argLength: 2, reg: fp21, asm: "VPSHRDW", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSHRDWMasked512", argLength: 3, reg: fp2kfp, asm: "VPSHRDW", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPCMPWMasked128", argLength: 3, reg: fp2kk, asm: "VPCMPW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPEXTRW128", argLength: 1, reg: fpgp, asm: "VPEXTRW", aux: "Int8", commutative: false, typ: "int16", resultInArg0: false}, {name: "VPCMPW128", argLength: 2, reg: fp2k, asm: "VPCMPW", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, - {name: "VPCMPWMasked128", argLength: 3, reg: fp2kk, asm: "VPCMPW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, - {name: "VPSHLDWMasked128", argLength: 3, reg: fp2kfp, asm: "VPSHLDW", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPSHRDWMasked128", argLength: 3, reg: fp2kfp, asm: "VPSHRDW", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPINSRW128", argLength: 2, reg: fpgpfp, asm: "VPINSRW", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSHLDW128", argLength: 2, reg: fp21, asm: "VPSHLDW", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSHLDWMasked128", argLength: 3, reg: fp2kfp, asm: "VPSHLDW", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSHRDW128", argLength: 2, reg: fp21, asm: "VPSHRDW", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSHRDWMasked128", argLength: 3, reg: fp2kfp, asm: "VPSHRDW", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPCMPD512", argLength: 2, reg: fp2k, asm: "VPCMPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPDMasked512", argLength: 3, reg: fp2kk, asm: "VPCMPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, - {name: "VPROLDMasked512", argLength: 2, reg: fpkfp, asm: "VPROLD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPRORDMasked512", argLength: 2, reg: fpkfp, asm: "VPRORD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPSHLDDMasked512", argLength: 3, reg: fp2kfp, asm: "VPSHLDD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPSHRDDMasked512", argLength: 3, reg: fp2kfp, asm: "VPSHRDD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPROLD512", argLength: 1, reg: fp11, asm: "VPROLD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPROLDMasked512", argLength: 2, reg: fpkfp, asm: "VPROLD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPRORD512", argLength: 1, reg: fp11, asm: "VPRORD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPRORDMasked512", argLength: 2, reg: fpkfp, asm: "VPRORD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSHLDD512", argLength: 2, reg: fp21, asm: "VPSHLDD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSHLDDMasked512", argLength: 3, reg: fp2kfp, asm: "VPSHLDD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSHRDD512", argLength: 2, reg: fp21, asm: "VPSHRDD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSHRDDMasked512", argLength: 3, reg: fp2kfp, asm: "VPSHRDD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPCMPDMasked128", argLength: 3, reg: fp2kk, asm: "VPCMPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPEXTRD128", argLength: 1, reg: fpgp, asm: "VPEXTRD", aux: "Int8", commutative: false, typ: "int32", resultInArg0: false}, {name: "VPCMPD128", argLength: 2, reg: fp2k, asm: "VPCMPD", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, - {name: "VPCMPDMasked128", argLength: 3, reg: fp2kk, asm: "VPCMPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, - {name: "VPROLDMasked128", argLength: 2, reg: fpkfp, asm: "VPROLD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPRORDMasked128", argLength: 2, reg: fpkfp, asm: "VPRORD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPSHLDDMasked128", argLength: 3, reg: fp2kfp, asm: "VPSHLDD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPSHRDDMasked128", argLength: 3, reg: fp2kfp, asm: "VPSHRDD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPROLD128", argLength: 1, reg: fp11, asm: "VPROLD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPROLDMasked128", argLength: 2, reg: fpkfp, asm: "VPROLD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPRORD128", argLength: 1, reg: fp11, asm: "VPRORD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPRORDMasked128", argLength: 2, reg: fpkfp, asm: "VPRORD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPINSRD128", argLength: 2, reg: fpgpfp, asm: "VPINSRD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSHLDD128", argLength: 2, reg: fp21, asm: "VPSHLDD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSHLDDMasked128", argLength: 3, reg: fp2kfp, asm: "VPSHLDD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSHRDD128", argLength: 2, reg: fp21, asm: "VPSHRDD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPCMPD256", argLength: 2, reg: fp2k, asm: "VPCMPD", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, + {name: "VPSHRDDMasked128", argLength: 3, reg: fp2kfp, asm: "VPSHRDD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPCMPDMasked256", argLength: 3, reg: fp2kk, asm: "VPCMPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, - {name: "VPROLDMasked256", argLength: 2, reg: fpkfp, asm: "VPROLD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPRORDMasked256", argLength: 2, reg: fpkfp, asm: "VPRORD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPSHLDDMasked256", argLength: 3, reg: fp2kfp, asm: "VPSHLDD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPSHRDDMasked256", argLength: 3, reg: fp2kfp, asm: "VPSHRDD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPCMPD256", argLength: 2, reg: fp2k, asm: "VPCMPD", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPROLD256", argLength: 1, reg: fp11, asm: "VPROLD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPROLDMasked256", argLength: 2, reg: fpkfp, asm: "VPROLD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPRORD256", argLength: 1, reg: fp11, asm: "VPRORD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPRORDMasked256", argLength: 2, reg: fpkfp, asm: "VPRORD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSHLDD256", argLength: 2, reg: fp21, asm: "VPSHLDD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSHLDDMasked256", argLength: 3, reg: fp2kfp, asm: "VPSHLDD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSHRDD256", argLength: 2, reg: fp21, asm: "VPSHRDD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSHRDDMasked256", argLength: 3, reg: fp2kfp, asm: "VPSHRDD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPCMPQMasked128", argLength: 3, reg: fp2kk, asm: "VPCMPQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPEXTRQ128", argLength: 1, reg: fpgp, asm: "VPEXTRQ", aux: "Int8", commutative: false, typ: "int64", resultInArg0: false}, {name: "VPCMPQ128", argLength: 2, reg: fp2k, asm: "VPCMPQ", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, - {name: "VPCMPQMasked128", argLength: 3, reg: fp2kk, asm: "VPCMPQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, - {name: "VPROLQMasked128", argLength: 2, reg: fpkfp, asm: "VPROLQ", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPRORQMasked128", argLength: 2, reg: fpkfp, asm: "VPRORQ", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPSHLDQMasked128", argLength: 3, reg: fp2kfp, asm: "VPSHLDQ", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPSHRDQMasked128", argLength: 3, reg: fp2kfp, asm: "VPSHRDQ", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPROLQ128", argLength: 1, reg: fp11, asm: "VPROLQ", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPROLQMasked128", argLength: 2, reg: fpkfp, asm: "VPROLQ", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPRORQ128", argLength: 1, reg: fp11, asm: "VPRORQ", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPRORQMasked128", argLength: 2, reg: fpkfp, asm: "VPRORQ", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPINSRQ128", argLength: 2, reg: fpgpfp, asm: "VPINSRQ", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSHLDQ128", argLength: 2, reg: fp21, asm: "VPSHLDQ", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSHLDQMasked128", argLength: 3, reg: fp2kfp, asm: "VPSHLDQ", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSHRDQ128", argLength: 2, reg: fp21, asm: "VPSHRDQ", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPCMPQ256", argLength: 2, reg: fp2k, asm: "VPCMPQ", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, + {name: "VPSHRDQMasked128", argLength: 3, reg: fp2kfp, asm: "VPSHRDQ", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPCMPQMasked256", argLength: 3, reg: fp2kk, asm: "VPCMPQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, - {name: "VPROLQMasked256", argLength: 2, reg: fpkfp, asm: "VPROLQ", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPRORQMasked256", argLength: 2, reg: fpkfp, asm: "VPRORQ", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPSHLDQMasked256", argLength: 3, reg: fp2kfp, asm: "VPSHLDQ", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPSHRDQMasked256", argLength: 3, reg: fp2kfp, asm: "VPSHRDQ", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPCMPQ256", argLength: 2, reg: fp2k, asm: "VPCMPQ", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPROLQ256", argLength: 1, reg: fp11, asm: "VPROLQ", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPROLQMasked256", argLength: 2, reg: fpkfp, asm: "VPROLQ", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPRORQ256", argLength: 1, reg: fp11, asm: "VPRORQ", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPRORQMasked256", argLength: 2, reg: fpkfp, asm: "VPRORQ", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSHLDQ256", argLength: 2, reg: fp21, asm: "VPSHLDQ", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSHLDQMasked256", argLength: 3, reg: fp2kfp, asm: "VPSHLDQ", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSHRDQ256", argLength: 2, reg: fp21, asm: "VPSHRDQ", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSHRDQMasked256", argLength: 3, reg: fp2kfp, asm: "VPSHRDQ", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPCMPQ512", argLength: 2, reg: fp2k, asm: "VPCMPQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPQMasked512", argLength: 3, reg: fp2kk, asm: "VPCMPQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, - {name: "VPROLQMasked512", argLength: 2, reg: fpkfp, asm: "VPROLQ", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPRORQMasked512", argLength: 2, reg: fpkfp, asm: "VPRORQ", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPSHLDQMasked512", argLength: 3, reg: fp2kfp, asm: "VPSHLDQ", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPSHRDQMasked512", argLength: 3, reg: fp2kfp, asm: "VPSHRDQ", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPROLQ512", argLength: 1, reg: fp11, asm: "VPROLQ", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPROLQMasked512", argLength: 2, reg: fpkfp, asm: "VPROLQ", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPRORQ512", argLength: 1, reg: fp11, asm: "VPRORQ", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPRORQMasked512", argLength: 2, reg: fpkfp, asm: "VPRORQ", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSHLDQ512", argLength: 2, reg: fp21, asm: "VPSHLDQ", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSHLDQMasked512", argLength: 3, reg: fp2kfp, asm: "VPSHLDQ", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSHRDQ512", argLength: 2, reg: fp21, asm: "VPSHRDQ", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSHRDQMasked512", argLength: 3, reg: fp2kfp, asm: "VPSHRDQ", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPCMPBMasked128", argLength: 3, reg: fp2kk, asm: "VPCMPB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPEXTRB128", argLength: 1, reg: fpgp, asm: "VPEXTRB", aux: "Int8", commutative: false, typ: "int8", resultInArg0: false}, {name: "VPCMPB128", argLength: 2, reg: fp2k, asm: "VPCMPB", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, - {name: "VPCMPBMasked128", argLength: 3, reg: fp2kk, asm: "VPCMPB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPINSRB128", argLength: 2, reg: fpgpfp, asm: "VPINSRB", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPCMPBMasked256", argLength: 3, reg: fp2kk, asm: "VPCMPB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VEXTRACTI128128", argLength: 1, reg: fp11, asm: "VEXTRACTI128", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPCMPB256", argLength: 2, reg: fp2k, asm: "VPCMPB", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, - {name: "VPCMPBMasked256", argLength: 3, reg: fp2kk, asm: "VPCMPB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VINSERTI128256", argLength: 2, reg: fp21, asm: "VINSERTI128", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPCMPB512", argLength: 2, reg: fp2k, asm: "VPCMPB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPBMasked512", argLength: 3, reg: fp2kk, asm: "VPCMPB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, @@ -856,22 +856,22 @@ func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp, {name: "VPCMPUQ512", argLength: 2, reg: fp2k, asm: "VPCMPUQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPUQMasked512", argLength: 3, reg: fp2kk, asm: "VPCMPUQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPUB128", argLength: 2, reg: fp2k, asm: "VPCMPUB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, + {name: "VPCMPUBMasked128", argLength: 3, reg: fp2kk, asm: "VPCMPUB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VGF2P8AFFINEQB128", argLength: 2, reg: fp21, asm: "VGF2P8AFFINEQB", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VGF2P8AFFINEINVQB128", argLength: 2, reg: fp21, asm: "VGF2P8AFFINEINVQB", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPCMPUBMasked128", argLength: 3, reg: fp2kk, asm: "VPCMPUB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, - {name: "VGF2P8AFFINEQBMasked128", argLength: 3, reg: fp2kfp, asm: "VGF2P8AFFINEQB", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VGF2P8AFFINEINVQBMasked128", argLength: 3, reg: fp2kfp, asm: "VGF2P8AFFINEINVQB", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VGF2P8AFFINEQBMasked128", argLength: 3, reg: fp2kfp, asm: "VGF2P8AFFINEQB", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPCMPUB256", argLength: 2, reg: fp2k, asm: "VPCMPUB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, + {name: "VPCMPUBMasked256", argLength: 3, reg: fp2kk, asm: "VPCMPUB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VGF2P8AFFINEQB256", argLength: 2, reg: fp21, asm: "VGF2P8AFFINEQB", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VGF2P8AFFINEINVQB256", argLength: 2, reg: fp21, asm: "VGF2P8AFFINEINVQB", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPCMPUBMasked256", argLength: 3, reg: fp2kk, asm: "VPCMPUB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, - {name: "VGF2P8AFFINEQBMasked256", argLength: 3, reg: fp2kfp, asm: "VGF2P8AFFINEQB", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VGF2P8AFFINEINVQBMasked256", argLength: 3, reg: fp2kfp, asm: "VGF2P8AFFINEINVQB", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VGF2P8AFFINEQBMasked256", argLength: 3, reg: fp2kfp, asm: "VGF2P8AFFINEQB", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPCMPUB512", argLength: 2, reg: fp2k, asm: "VPCMPUB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, + {name: "VPCMPUBMasked512", argLength: 3, reg: fp2kk, asm: "VPCMPUB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VGF2P8AFFINEQB512", argLength: 2, reg: fp21, asm: "VGF2P8AFFINEQB", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VGF2P8AFFINEINVQB512", argLength: 2, reg: fp21, asm: "VGF2P8AFFINEINVQB", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPCMPUBMasked512", argLength: 3, reg: fp2kk, asm: "VPCMPUB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, - {name: "VGF2P8AFFINEQBMasked512", argLength: 3, reg: fp2kfp, asm: "VGF2P8AFFINEQB", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VGF2P8AFFINEINVQBMasked512", argLength: 3, reg: fp2kfp, asm: "VGF2P8AFFINEINVQB", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VGF2P8AFFINEQBMasked512", argLength: 3, reg: fp2kfp, asm: "VGF2P8AFFINEQB", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, } } diff --git a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go index 54c247eab1..1079321da7 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go +++ b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go @@ -4,1681 +4,1681 @@ package main func simdGenericOps() []opData { return []opData{ {name: "AddFloat32x16", argLength: 2, commutative: true}, + {name: "AddMaskedFloat32x16", argLength: 3, commutative: true}, {name: "ApproximateReciprocalFloat32x16", argLength: 1, commutative: false}, + {name: "ApproximateReciprocalMaskedFloat32x16", argLength: 2, commutative: false}, {name: "ApproximateReciprocalOfSqrtFloat32x16", argLength: 1, commutative: false}, + {name: "ApproximateReciprocalOfSqrtMaskedFloat32x16", argLength: 2, commutative: false}, {name: "DivFloat32x16", argLength: 2, commutative: false}, + {name: "DivMaskedFloat32x16", argLength: 3, commutative: false}, {name: "EqualFloat32x16", argLength: 2, commutative: true}, + {name: "EqualMaskedFloat32x16", argLength: 3, commutative: true}, {name: "FusedMultiplyAddFloat32x16", argLength: 3, commutative: false}, + {name: "FusedMultiplyAddMaskedFloat32x16", argLength: 4, commutative: false}, {name: "FusedMultiplyAddSubFloat32x16", argLength: 3, commutative: false}, + {name: "FusedMultiplyAddSubMaskedFloat32x16", argLength: 4, commutative: false}, {name: "FusedMultiplySubAddFloat32x16", argLength: 3, commutative: false}, + {name: "FusedMultiplySubAddMaskedFloat32x16", argLength: 4, commutative: false}, {name: "GreaterFloat32x16", argLength: 2, commutative: false}, {name: "GreaterEqualFloat32x16", argLength: 2, commutative: false}, + {name: "GreaterEqualMaskedFloat32x16", argLength: 3, commutative: false}, + {name: "GreaterMaskedFloat32x16", argLength: 3, commutative: false}, {name: "IsNanFloat32x16", argLength: 2, commutative: true}, + {name: "IsNanMaskedFloat32x16", argLength: 3, commutative: true}, {name: "LessFloat32x16", argLength: 2, commutative: false}, {name: "LessEqualFloat32x16", argLength: 2, commutative: false}, - {name: "MaskedAddFloat32x16", argLength: 3, commutative: true}, - {name: "MaskedApproximateReciprocalFloat32x16", argLength: 2, commutative: false}, - {name: "MaskedApproximateReciprocalOfSqrtFloat32x16", argLength: 2, commutative: false}, - {name: "MaskedDivFloat32x16", argLength: 3, commutative: false}, - {name: "MaskedEqualFloat32x16", argLength: 3, commutative: true}, - {name: "MaskedFusedMultiplyAddFloat32x16", argLength: 4, commutative: false}, - {name: "MaskedFusedMultiplyAddSubFloat32x16", argLength: 4, commutative: false}, - {name: "MaskedFusedMultiplySubAddFloat32x16", argLength: 4, commutative: false}, - {name: "MaskedGreaterFloat32x16", argLength: 3, commutative: false}, - {name: "MaskedGreaterEqualFloat32x16", argLength: 3, commutative: false}, - {name: "MaskedIsNanFloat32x16", argLength: 3, commutative: true}, - {name: "MaskedLessFloat32x16", argLength: 3, commutative: false}, - {name: "MaskedLessEqualFloat32x16", argLength: 3, commutative: false}, - {name: "MaskedMaxFloat32x16", argLength: 3, commutative: true}, - {name: "MaskedMinFloat32x16", argLength: 3, commutative: true}, - {name: "MaskedMulFloat32x16", argLength: 3, commutative: true}, - {name: "MaskedMulByPowOf2Float32x16", argLength: 3, commutative: false}, - {name: "MaskedNotEqualFloat32x16", argLength: 3, commutative: true}, - {name: "MaskedSqrtFloat32x16", argLength: 2, commutative: false}, - {name: "MaskedSubFloat32x16", argLength: 3, commutative: false}, + {name: "LessEqualMaskedFloat32x16", argLength: 3, commutative: false}, + {name: "LessMaskedFloat32x16", argLength: 3, commutative: false}, {name: "MaxFloat32x16", argLength: 2, commutative: true}, + {name: "MaxMaskedFloat32x16", argLength: 3, commutative: true}, {name: "MinFloat32x16", argLength: 2, commutative: true}, + {name: "MinMaskedFloat32x16", argLength: 3, commutative: true}, {name: "MulFloat32x16", argLength: 2, commutative: true}, {name: "MulByPowOf2Float32x16", argLength: 2, commutative: false}, + {name: "MulByPowOf2MaskedFloat32x16", argLength: 3, commutative: false}, + {name: "MulMaskedFloat32x16", argLength: 3, commutative: true}, {name: "NotEqualFloat32x16", argLength: 2, commutative: true}, + {name: "NotEqualMaskedFloat32x16", argLength: 3, commutative: true}, {name: "SqrtFloat32x16", argLength: 1, commutative: false}, + {name: "SqrtMaskedFloat32x16", argLength: 2, commutative: false}, {name: "SubFloat32x16", argLength: 2, commutative: false}, + {name: "SubMaskedFloat32x16", argLength: 3, commutative: false}, {name: "AddFloat32x4", argLength: 2, commutative: true}, + {name: "AddMaskedFloat32x4", argLength: 3, commutative: true}, {name: "AddSubFloat32x4", argLength: 2, commutative: false}, {name: "ApproximateReciprocalFloat32x4", argLength: 1, commutative: false}, + {name: "ApproximateReciprocalMaskedFloat32x4", argLength: 2, commutative: false}, {name: "ApproximateReciprocalOfSqrtFloat32x4", argLength: 1, commutative: false}, + {name: "ApproximateReciprocalOfSqrtMaskedFloat32x4", argLength: 2, commutative: false}, {name: "CeilFloat32x4", argLength: 1, commutative: false}, {name: "DivFloat32x4", argLength: 2, commutative: false}, + {name: "DivMaskedFloat32x4", argLength: 3, commutative: false}, {name: "EqualFloat32x4", argLength: 2, commutative: true}, + {name: "EqualMaskedFloat32x4", argLength: 3, commutative: true}, {name: "FloorFloat32x4", argLength: 1, commutative: false}, {name: "FusedMultiplyAddFloat32x4", argLength: 3, commutative: false}, + {name: "FusedMultiplyAddMaskedFloat32x4", argLength: 4, commutative: false}, {name: "FusedMultiplyAddSubFloat32x4", argLength: 3, commutative: false}, + {name: "FusedMultiplyAddSubMaskedFloat32x4", argLength: 4, commutative: false}, {name: "FusedMultiplySubAddFloat32x4", argLength: 3, commutative: false}, + {name: "FusedMultiplySubAddMaskedFloat32x4", argLength: 4, commutative: false}, {name: "GreaterFloat32x4", argLength: 2, commutative: false}, {name: "GreaterEqualFloat32x4", argLength: 2, commutative: false}, + {name: "GreaterEqualMaskedFloat32x4", argLength: 3, commutative: false}, + {name: "GreaterMaskedFloat32x4", argLength: 3, commutative: false}, {name: "IsNanFloat32x4", argLength: 2, commutative: true}, + {name: "IsNanMaskedFloat32x4", argLength: 3, commutative: true}, {name: "LessFloat32x4", argLength: 2, commutative: false}, {name: "LessEqualFloat32x4", argLength: 2, commutative: false}, - {name: "MaskedAddFloat32x4", argLength: 3, commutative: true}, - {name: "MaskedApproximateReciprocalFloat32x4", argLength: 2, commutative: false}, - {name: "MaskedApproximateReciprocalOfSqrtFloat32x4", argLength: 2, commutative: false}, - {name: "MaskedDivFloat32x4", argLength: 3, commutative: false}, - {name: "MaskedEqualFloat32x4", argLength: 3, commutative: true}, - {name: "MaskedFusedMultiplyAddFloat32x4", argLength: 4, commutative: false}, - {name: "MaskedFusedMultiplyAddSubFloat32x4", argLength: 4, commutative: false}, - {name: "MaskedFusedMultiplySubAddFloat32x4", argLength: 4, commutative: false}, - {name: "MaskedGreaterFloat32x4", argLength: 3, commutative: false}, - {name: "MaskedGreaterEqualFloat32x4", argLength: 3, commutative: false}, - {name: "MaskedIsNanFloat32x4", argLength: 3, commutative: true}, - {name: "MaskedLessFloat32x4", argLength: 3, commutative: false}, - {name: "MaskedLessEqualFloat32x4", argLength: 3, commutative: false}, - {name: "MaskedMaxFloat32x4", argLength: 3, commutative: true}, - {name: "MaskedMinFloat32x4", argLength: 3, commutative: true}, - {name: "MaskedMulFloat32x4", argLength: 3, commutative: true}, - {name: "MaskedMulByPowOf2Float32x4", argLength: 3, commutative: false}, - {name: "MaskedNotEqualFloat32x4", argLength: 3, commutative: true}, - {name: "MaskedSqrtFloat32x4", argLength: 2, commutative: false}, - {name: "MaskedSubFloat32x4", argLength: 3, commutative: false}, + {name: "LessEqualMaskedFloat32x4", argLength: 3, commutative: false}, + {name: "LessMaskedFloat32x4", argLength: 3, commutative: false}, {name: "MaxFloat32x4", argLength: 2, commutative: true}, + {name: "MaxMaskedFloat32x4", argLength: 3, commutative: true}, {name: "MinFloat32x4", argLength: 2, commutative: true}, + {name: "MinMaskedFloat32x4", argLength: 3, commutative: true}, {name: "MulFloat32x4", argLength: 2, commutative: true}, {name: "MulByPowOf2Float32x4", argLength: 2, commutative: false}, + {name: "MulByPowOf2MaskedFloat32x4", argLength: 3, commutative: false}, + {name: "MulMaskedFloat32x4", argLength: 3, commutative: true}, {name: "NotEqualFloat32x4", argLength: 2, commutative: true}, + {name: "NotEqualMaskedFloat32x4", argLength: 3, commutative: true}, {name: "PairwiseAddFloat32x4", argLength: 2, commutative: false}, {name: "PairwiseSubFloat32x4", argLength: 2, commutative: false}, {name: "RoundFloat32x4", argLength: 1, commutative: false}, {name: "SqrtFloat32x4", argLength: 1, commutative: false}, + {name: "SqrtMaskedFloat32x4", argLength: 2, commutative: false}, {name: "SubFloat32x4", argLength: 2, commutative: false}, + {name: "SubMaskedFloat32x4", argLength: 3, commutative: false}, {name: "TruncFloat32x4", argLength: 1, commutative: false}, {name: "AddFloat32x8", argLength: 2, commutative: true}, + {name: "AddMaskedFloat32x8", argLength: 3, commutative: true}, {name: "AddSubFloat32x8", argLength: 2, commutative: false}, {name: "ApproximateReciprocalFloat32x8", argLength: 1, commutative: false}, + {name: "ApproximateReciprocalMaskedFloat32x8", argLength: 2, commutative: false}, {name: "ApproximateReciprocalOfSqrtFloat32x8", argLength: 1, commutative: false}, + {name: "ApproximateReciprocalOfSqrtMaskedFloat32x8", argLength: 2, commutative: false}, {name: "CeilFloat32x8", argLength: 1, commutative: false}, {name: "DivFloat32x8", argLength: 2, commutative: false}, + {name: "DivMaskedFloat32x8", argLength: 3, commutative: false}, {name: "EqualFloat32x8", argLength: 2, commutative: true}, + {name: "EqualMaskedFloat32x8", argLength: 3, commutative: true}, {name: "FloorFloat32x8", argLength: 1, commutative: false}, {name: "FusedMultiplyAddFloat32x8", argLength: 3, commutative: false}, + {name: "FusedMultiplyAddMaskedFloat32x8", argLength: 4, commutative: false}, {name: "FusedMultiplyAddSubFloat32x8", argLength: 3, commutative: false}, + {name: "FusedMultiplyAddSubMaskedFloat32x8", argLength: 4, commutative: false}, {name: "FusedMultiplySubAddFloat32x8", argLength: 3, commutative: false}, + {name: "FusedMultiplySubAddMaskedFloat32x8", argLength: 4, commutative: false}, {name: "GreaterFloat32x8", argLength: 2, commutative: false}, {name: "GreaterEqualFloat32x8", argLength: 2, commutative: false}, + {name: "GreaterEqualMaskedFloat32x8", argLength: 3, commutative: false}, + {name: "GreaterMaskedFloat32x8", argLength: 3, commutative: false}, {name: "IsNanFloat32x8", argLength: 2, commutative: true}, + {name: "IsNanMaskedFloat32x8", argLength: 3, commutative: true}, {name: "LessFloat32x8", argLength: 2, commutative: false}, {name: "LessEqualFloat32x8", argLength: 2, commutative: false}, - {name: "MaskedAddFloat32x8", argLength: 3, commutative: true}, - {name: "MaskedApproximateReciprocalFloat32x8", argLength: 2, commutative: false}, - {name: "MaskedApproximateReciprocalOfSqrtFloat32x8", argLength: 2, commutative: false}, - {name: "MaskedDivFloat32x8", argLength: 3, commutative: false}, - {name: "MaskedEqualFloat32x8", argLength: 3, commutative: true}, - {name: "MaskedFusedMultiplyAddFloat32x8", argLength: 4, commutative: false}, - {name: "MaskedFusedMultiplyAddSubFloat32x8", argLength: 4, commutative: false}, - {name: "MaskedFusedMultiplySubAddFloat32x8", argLength: 4, commutative: false}, - {name: "MaskedGreaterFloat32x8", argLength: 3, commutative: false}, - {name: "MaskedGreaterEqualFloat32x8", argLength: 3, commutative: false}, - {name: "MaskedIsNanFloat32x8", argLength: 3, commutative: true}, - {name: "MaskedLessFloat32x8", argLength: 3, commutative: false}, - {name: "MaskedLessEqualFloat32x8", argLength: 3, commutative: false}, - {name: "MaskedMaxFloat32x8", argLength: 3, commutative: true}, - {name: "MaskedMinFloat32x8", argLength: 3, commutative: true}, - {name: "MaskedMulFloat32x8", argLength: 3, commutative: true}, - {name: "MaskedMulByPowOf2Float32x8", argLength: 3, commutative: false}, - {name: "MaskedNotEqualFloat32x8", argLength: 3, commutative: true}, - {name: "MaskedSqrtFloat32x8", argLength: 2, commutative: false}, - {name: "MaskedSubFloat32x8", argLength: 3, commutative: false}, + {name: "LessEqualMaskedFloat32x8", argLength: 3, commutative: false}, + {name: "LessMaskedFloat32x8", argLength: 3, commutative: false}, {name: "MaxFloat32x8", argLength: 2, commutative: true}, + {name: "MaxMaskedFloat32x8", argLength: 3, commutative: true}, {name: "MinFloat32x8", argLength: 2, commutative: true}, + {name: "MinMaskedFloat32x8", argLength: 3, commutative: true}, {name: "MulFloat32x8", argLength: 2, commutative: true}, {name: "MulByPowOf2Float32x8", argLength: 2, commutative: false}, + {name: "MulByPowOf2MaskedFloat32x8", argLength: 3, commutative: false}, + {name: "MulMaskedFloat32x8", argLength: 3, commutative: true}, {name: "NotEqualFloat32x8", argLength: 2, commutative: true}, + {name: "NotEqualMaskedFloat32x8", argLength: 3, commutative: true}, {name: "PairwiseAddFloat32x8", argLength: 2, commutative: false}, {name: "PairwiseSubFloat32x8", argLength: 2, commutative: false}, {name: "RoundFloat32x8", argLength: 1, commutative: false}, {name: "SqrtFloat32x8", argLength: 1, commutative: false}, + {name: "SqrtMaskedFloat32x8", argLength: 2, commutative: false}, {name: "SubFloat32x8", argLength: 2, commutative: false}, + {name: "SubMaskedFloat32x8", argLength: 3, commutative: false}, {name: "TruncFloat32x8", argLength: 1, commutative: false}, {name: "AddFloat64x2", argLength: 2, commutative: true}, + {name: "AddMaskedFloat64x2", argLength: 3, commutative: true}, {name: "AddSubFloat64x2", argLength: 2, commutative: false}, {name: "ApproximateReciprocalFloat64x2", argLength: 1, commutative: false}, + {name: "ApproximateReciprocalMaskedFloat64x2", argLength: 2, commutative: false}, {name: "ApproximateReciprocalOfSqrtFloat64x2", argLength: 1, commutative: false}, + {name: "ApproximateReciprocalOfSqrtMaskedFloat64x2", argLength: 2, commutative: false}, {name: "CeilFloat64x2", argLength: 1, commutative: false}, {name: "DivFloat64x2", argLength: 2, commutative: false}, + {name: "DivMaskedFloat64x2", argLength: 3, commutative: false}, {name: "DotProdBroadcastFloat64x2", argLength: 2, commutative: true}, {name: "EqualFloat64x2", argLength: 2, commutative: true}, + {name: "EqualMaskedFloat64x2", argLength: 3, commutative: true}, {name: "FloorFloat64x2", argLength: 1, commutative: false}, {name: "FusedMultiplyAddFloat64x2", argLength: 3, commutative: false}, + {name: "FusedMultiplyAddMaskedFloat64x2", argLength: 4, commutative: false}, {name: "FusedMultiplyAddSubFloat64x2", argLength: 3, commutative: false}, + {name: "FusedMultiplyAddSubMaskedFloat64x2", argLength: 4, commutative: false}, {name: "FusedMultiplySubAddFloat64x2", argLength: 3, commutative: false}, + {name: "FusedMultiplySubAddMaskedFloat64x2", argLength: 4, commutative: false}, {name: "GreaterFloat64x2", argLength: 2, commutative: false}, {name: "GreaterEqualFloat64x2", argLength: 2, commutative: false}, + {name: "GreaterEqualMaskedFloat64x2", argLength: 3, commutative: false}, + {name: "GreaterMaskedFloat64x2", argLength: 3, commutative: false}, {name: "IsNanFloat64x2", argLength: 2, commutative: true}, + {name: "IsNanMaskedFloat64x2", argLength: 3, commutative: true}, {name: "LessFloat64x2", argLength: 2, commutative: false}, {name: "LessEqualFloat64x2", argLength: 2, commutative: false}, - {name: "MaskedAddFloat64x2", argLength: 3, commutative: true}, - {name: "MaskedApproximateReciprocalFloat64x2", argLength: 2, commutative: false}, - {name: "MaskedApproximateReciprocalOfSqrtFloat64x2", argLength: 2, commutative: false}, - {name: "MaskedDivFloat64x2", argLength: 3, commutative: false}, - {name: "MaskedEqualFloat64x2", argLength: 3, commutative: true}, - {name: "MaskedFusedMultiplyAddFloat64x2", argLength: 4, commutative: false}, - {name: "MaskedFusedMultiplyAddSubFloat64x2", argLength: 4, commutative: false}, - {name: "MaskedFusedMultiplySubAddFloat64x2", argLength: 4, commutative: false}, - {name: "MaskedGreaterFloat64x2", argLength: 3, commutative: false}, - {name: "MaskedGreaterEqualFloat64x2", argLength: 3, commutative: false}, - {name: "MaskedIsNanFloat64x2", argLength: 3, commutative: true}, - {name: "MaskedLessFloat64x2", argLength: 3, commutative: false}, - {name: "MaskedLessEqualFloat64x2", argLength: 3, commutative: false}, - {name: "MaskedMaxFloat64x2", argLength: 3, commutative: true}, - {name: "MaskedMinFloat64x2", argLength: 3, commutative: true}, - {name: "MaskedMulFloat64x2", argLength: 3, commutative: true}, - {name: "MaskedMulByPowOf2Float64x2", argLength: 3, commutative: false}, - {name: "MaskedNotEqualFloat64x2", argLength: 3, commutative: true}, - {name: "MaskedSqrtFloat64x2", argLength: 2, commutative: false}, - {name: "MaskedSubFloat64x2", argLength: 3, commutative: false}, + {name: "LessEqualMaskedFloat64x2", argLength: 3, commutative: false}, + {name: "LessMaskedFloat64x2", argLength: 3, commutative: false}, {name: "MaxFloat64x2", argLength: 2, commutative: true}, + {name: "MaxMaskedFloat64x2", argLength: 3, commutative: true}, {name: "MinFloat64x2", argLength: 2, commutative: true}, + {name: "MinMaskedFloat64x2", argLength: 3, commutative: true}, {name: "MulFloat64x2", argLength: 2, commutative: true}, {name: "MulByPowOf2Float64x2", argLength: 2, commutative: false}, + {name: "MulByPowOf2MaskedFloat64x2", argLength: 3, commutative: false}, + {name: "MulMaskedFloat64x2", argLength: 3, commutative: true}, {name: "NotEqualFloat64x2", argLength: 2, commutative: true}, + {name: "NotEqualMaskedFloat64x2", argLength: 3, commutative: true}, {name: "PairwiseAddFloat64x2", argLength: 2, commutative: false}, {name: "PairwiseSubFloat64x2", argLength: 2, commutative: false}, {name: "RoundFloat64x2", argLength: 1, commutative: false}, {name: "SqrtFloat64x2", argLength: 1, commutative: false}, + {name: "SqrtMaskedFloat64x2", argLength: 2, commutative: false}, {name: "SubFloat64x2", argLength: 2, commutative: false}, + {name: "SubMaskedFloat64x2", argLength: 3, commutative: false}, {name: "TruncFloat64x2", argLength: 1, commutative: false}, {name: "AddFloat64x4", argLength: 2, commutative: true}, + {name: "AddMaskedFloat64x4", argLength: 3, commutative: true}, {name: "AddSubFloat64x4", argLength: 2, commutative: false}, {name: "ApproximateReciprocalFloat64x4", argLength: 1, commutative: false}, + {name: "ApproximateReciprocalMaskedFloat64x4", argLength: 2, commutative: false}, {name: "ApproximateReciprocalOfSqrtFloat64x4", argLength: 1, commutative: false}, + {name: "ApproximateReciprocalOfSqrtMaskedFloat64x4", argLength: 2, commutative: false}, {name: "CeilFloat64x4", argLength: 1, commutative: false}, {name: "DivFloat64x4", argLength: 2, commutative: false}, + {name: "DivMaskedFloat64x4", argLength: 3, commutative: false}, {name: "EqualFloat64x4", argLength: 2, commutative: true}, + {name: "EqualMaskedFloat64x4", argLength: 3, commutative: true}, {name: "FloorFloat64x4", argLength: 1, commutative: false}, {name: "FusedMultiplyAddFloat64x4", argLength: 3, commutative: false}, + {name: "FusedMultiplyAddMaskedFloat64x4", argLength: 4, commutative: false}, {name: "FusedMultiplyAddSubFloat64x4", argLength: 3, commutative: false}, + {name: "FusedMultiplyAddSubMaskedFloat64x4", argLength: 4, commutative: false}, {name: "FusedMultiplySubAddFloat64x4", argLength: 3, commutative: false}, + {name: "FusedMultiplySubAddMaskedFloat64x4", argLength: 4, commutative: false}, {name: "GreaterFloat64x4", argLength: 2, commutative: false}, {name: "GreaterEqualFloat64x4", argLength: 2, commutative: false}, + {name: "GreaterEqualMaskedFloat64x4", argLength: 3, commutative: false}, + {name: "GreaterMaskedFloat64x4", argLength: 3, commutative: false}, {name: "IsNanFloat64x4", argLength: 2, commutative: true}, + {name: "IsNanMaskedFloat64x4", argLength: 3, commutative: true}, {name: "LessFloat64x4", argLength: 2, commutative: false}, {name: "LessEqualFloat64x4", argLength: 2, commutative: false}, - {name: "MaskedAddFloat64x4", argLength: 3, commutative: true}, - {name: "MaskedApproximateReciprocalFloat64x4", argLength: 2, commutative: false}, - {name: "MaskedApproximateReciprocalOfSqrtFloat64x4", argLength: 2, commutative: false}, - {name: "MaskedDivFloat64x4", argLength: 3, commutative: false}, - {name: "MaskedEqualFloat64x4", argLength: 3, commutative: true}, - {name: "MaskedFusedMultiplyAddFloat64x4", argLength: 4, commutative: false}, - {name: "MaskedFusedMultiplyAddSubFloat64x4", argLength: 4, commutative: false}, - {name: "MaskedFusedMultiplySubAddFloat64x4", argLength: 4, commutative: false}, - {name: "MaskedGreaterFloat64x4", argLength: 3, commutative: false}, - {name: "MaskedGreaterEqualFloat64x4", argLength: 3, commutative: false}, - {name: "MaskedIsNanFloat64x4", argLength: 3, commutative: true}, - {name: "MaskedLessFloat64x4", argLength: 3, commutative: false}, - {name: "MaskedLessEqualFloat64x4", argLength: 3, commutative: false}, - {name: "MaskedMaxFloat64x4", argLength: 3, commutative: true}, - {name: "MaskedMinFloat64x4", argLength: 3, commutative: true}, - {name: "MaskedMulFloat64x4", argLength: 3, commutative: true}, - {name: "MaskedMulByPowOf2Float64x4", argLength: 3, commutative: false}, - {name: "MaskedNotEqualFloat64x4", argLength: 3, commutative: true}, - {name: "MaskedSqrtFloat64x4", argLength: 2, commutative: false}, - {name: "MaskedSubFloat64x4", argLength: 3, commutative: false}, + {name: "LessEqualMaskedFloat64x4", argLength: 3, commutative: false}, + {name: "LessMaskedFloat64x4", argLength: 3, commutative: false}, {name: "MaxFloat64x4", argLength: 2, commutative: true}, + {name: "MaxMaskedFloat64x4", argLength: 3, commutative: true}, {name: "MinFloat64x4", argLength: 2, commutative: true}, + {name: "MinMaskedFloat64x4", argLength: 3, commutative: true}, {name: "MulFloat64x4", argLength: 2, commutative: true}, {name: "MulByPowOf2Float64x4", argLength: 2, commutative: false}, + {name: "MulByPowOf2MaskedFloat64x4", argLength: 3, commutative: false}, + {name: "MulMaskedFloat64x4", argLength: 3, commutative: true}, {name: "NotEqualFloat64x4", argLength: 2, commutative: true}, + {name: "NotEqualMaskedFloat64x4", argLength: 3, commutative: true}, {name: "PairwiseAddFloat64x4", argLength: 2, commutative: false}, {name: "PairwiseSubFloat64x4", argLength: 2, commutative: false}, {name: "RoundFloat64x4", argLength: 1, commutative: false}, {name: "SqrtFloat64x4", argLength: 1, commutative: false}, + {name: "SqrtMaskedFloat64x4", argLength: 2, commutative: false}, {name: "SubFloat64x4", argLength: 2, commutative: false}, + {name: "SubMaskedFloat64x4", argLength: 3, commutative: false}, {name: "TruncFloat64x4", argLength: 1, commutative: false}, {name: "AddFloat64x8", argLength: 2, commutative: true}, + {name: "AddMaskedFloat64x8", argLength: 3, commutative: true}, {name: "ApproximateReciprocalFloat64x8", argLength: 1, commutative: false}, + {name: "ApproximateReciprocalMaskedFloat64x8", argLength: 2, commutative: false}, {name: "ApproximateReciprocalOfSqrtFloat64x8", argLength: 1, commutative: false}, + {name: "ApproximateReciprocalOfSqrtMaskedFloat64x8", argLength: 2, commutative: false}, {name: "DivFloat64x8", argLength: 2, commutative: false}, + {name: "DivMaskedFloat64x8", argLength: 3, commutative: false}, {name: "EqualFloat64x8", argLength: 2, commutative: true}, + {name: "EqualMaskedFloat64x8", argLength: 3, commutative: true}, {name: "FusedMultiplyAddFloat64x8", argLength: 3, commutative: false}, + {name: "FusedMultiplyAddMaskedFloat64x8", argLength: 4, commutative: false}, {name: "FusedMultiplyAddSubFloat64x8", argLength: 3, commutative: false}, + {name: "FusedMultiplyAddSubMaskedFloat64x8", argLength: 4, commutative: false}, {name: "FusedMultiplySubAddFloat64x8", argLength: 3, commutative: false}, + {name: "FusedMultiplySubAddMaskedFloat64x8", argLength: 4, commutative: false}, {name: "GreaterFloat64x8", argLength: 2, commutative: false}, {name: "GreaterEqualFloat64x8", argLength: 2, commutative: false}, + {name: "GreaterEqualMaskedFloat64x8", argLength: 3, commutative: false}, + {name: "GreaterMaskedFloat64x8", argLength: 3, commutative: false}, {name: "IsNanFloat64x8", argLength: 2, commutative: true}, + {name: "IsNanMaskedFloat64x8", argLength: 3, commutative: true}, {name: "LessFloat64x8", argLength: 2, commutative: false}, {name: "LessEqualFloat64x8", argLength: 2, commutative: false}, - {name: "MaskedAddFloat64x8", argLength: 3, commutative: true}, - {name: "MaskedApproximateReciprocalFloat64x8", argLength: 2, commutative: false}, - {name: "MaskedApproximateReciprocalOfSqrtFloat64x8", argLength: 2, commutative: false}, - {name: "MaskedDivFloat64x8", argLength: 3, commutative: false}, - {name: "MaskedEqualFloat64x8", argLength: 3, commutative: true}, - {name: "MaskedFusedMultiplyAddFloat64x8", argLength: 4, commutative: false}, - {name: "MaskedFusedMultiplyAddSubFloat64x8", argLength: 4, commutative: false}, - {name: "MaskedFusedMultiplySubAddFloat64x8", argLength: 4, commutative: false}, - {name: "MaskedGreaterFloat64x8", argLength: 3, commutative: false}, - {name: "MaskedGreaterEqualFloat64x8", argLength: 3, commutative: false}, - {name: "MaskedIsNanFloat64x8", argLength: 3, commutative: true}, - {name: "MaskedLessFloat64x8", argLength: 3, commutative: false}, - {name: "MaskedLessEqualFloat64x8", argLength: 3, commutative: false}, - {name: "MaskedMaxFloat64x8", argLength: 3, commutative: true}, - {name: "MaskedMinFloat64x8", argLength: 3, commutative: true}, - {name: "MaskedMulFloat64x8", argLength: 3, commutative: true}, - {name: "MaskedMulByPowOf2Float64x8", argLength: 3, commutative: false}, - {name: "MaskedNotEqualFloat64x8", argLength: 3, commutative: true}, - {name: "MaskedSqrtFloat64x8", argLength: 2, commutative: false}, - {name: "MaskedSubFloat64x8", argLength: 3, commutative: false}, + {name: "LessEqualMaskedFloat64x8", argLength: 3, commutative: false}, + {name: "LessMaskedFloat64x8", argLength: 3, commutative: false}, {name: "MaxFloat64x8", argLength: 2, commutative: true}, + {name: "MaxMaskedFloat64x8", argLength: 3, commutative: true}, {name: "MinFloat64x8", argLength: 2, commutative: true}, + {name: "MinMaskedFloat64x8", argLength: 3, commutative: true}, {name: "MulFloat64x8", argLength: 2, commutative: true}, {name: "MulByPowOf2Float64x8", argLength: 2, commutative: false}, + {name: "MulByPowOf2MaskedFloat64x8", argLength: 3, commutative: false}, + {name: "MulMaskedFloat64x8", argLength: 3, commutative: true}, {name: "NotEqualFloat64x8", argLength: 2, commutative: true}, + {name: "NotEqualMaskedFloat64x8", argLength: 3, commutative: true}, {name: "SqrtFloat64x8", argLength: 1, commutative: false}, + {name: "SqrtMaskedFloat64x8", argLength: 2, commutative: false}, {name: "SubFloat64x8", argLength: 2, commutative: false}, + {name: "SubMaskedFloat64x8", argLength: 3, commutative: false}, {name: "AbsoluteInt16x16", argLength: 1, commutative: false}, + {name: "AbsoluteMaskedInt16x16", argLength: 2, commutative: false}, {name: "AddInt16x16", argLength: 2, commutative: true}, + {name: "AddMaskedInt16x16", argLength: 3, commutative: true}, {name: "AndInt16x16", argLength: 2, commutative: true}, {name: "AndNotInt16x16", argLength: 2, commutative: false}, {name: "EqualInt16x16", argLength: 2, commutative: true}, + {name: "EqualMaskedInt16x16", argLength: 3, commutative: true}, {name: "GreaterInt16x16", argLength: 2, commutative: false}, {name: "GreaterEqualInt16x16", argLength: 2, commutative: false}, + {name: "GreaterEqualMaskedInt16x16", argLength: 3, commutative: false}, + {name: "GreaterMaskedInt16x16", argLength: 3, commutative: false}, {name: "LessInt16x16", argLength: 2, commutative: false}, {name: "LessEqualInt16x16", argLength: 2, commutative: false}, - {name: "MaskedAbsoluteInt16x16", argLength: 2, commutative: false}, - {name: "MaskedAddInt16x16", argLength: 3, commutative: true}, - {name: "MaskedEqualInt16x16", argLength: 3, commutative: true}, - {name: "MaskedGreaterInt16x16", argLength: 3, commutative: false}, - {name: "MaskedGreaterEqualInt16x16", argLength: 3, commutative: false}, - {name: "MaskedLessInt16x16", argLength: 3, commutative: false}, - {name: "MaskedLessEqualInt16x16", argLength: 3, commutative: false}, - {name: "MaskedMaxInt16x16", argLength: 3, commutative: true}, - {name: "MaskedMinInt16x16", argLength: 3, commutative: true}, - {name: "MaskedMulHighInt16x16", argLength: 3, commutative: true}, - {name: "MaskedMulLowInt16x16", argLength: 3, commutative: true}, - {name: "MaskedNotEqualInt16x16", argLength: 3, commutative: true}, - {name: "MaskedPairDotProdInt16x16", argLength: 3, commutative: false}, - {name: "MaskedPopCountInt16x16", argLength: 2, commutative: false}, - {name: "MaskedSaturatedAddInt16x16", argLength: 3, commutative: true}, - {name: "MaskedSaturatedSubInt16x16", argLength: 3, commutative: false}, - {name: "MaskedShiftLeftInt16x16", argLength: 3, commutative: false}, - {name: "MaskedShiftLeftAndFillUpperFromInt16x16", argLength: 4, commutative: false}, - {name: "MaskedShiftRightInt16x16", argLength: 3, commutative: false}, - {name: "MaskedShiftRightAndFillUpperFromInt16x16", argLength: 4, commutative: false}, - {name: "MaskedShiftRightSignExtendedInt16x16", argLength: 3, commutative: false}, - {name: "MaskedSubInt16x16", argLength: 3, commutative: false}, + {name: "LessEqualMaskedInt16x16", argLength: 3, commutative: false}, + {name: "LessMaskedInt16x16", argLength: 3, commutative: false}, {name: "MaxInt16x16", argLength: 2, commutative: true}, + {name: "MaxMaskedInt16x16", argLength: 3, commutative: true}, {name: "MinInt16x16", argLength: 2, commutative: true}, + {name: "MinMaskedInt16x16", argLength: 3, commutative: true}, {name: "MulHighInt16x16", argLength: 2, commutative: true}, + {name: "MulHighMaskedInt16x16", argLength: 3, commutative: true}, {name: "MulLowInt16x16", argLength: 2, commutative: true}, + {name: "MulLowMaskedInt16x16", argLength: 3, commutative: true}, {name: "NotEqualInt16x16", argLength: 2, commutative: true}, + {name: "NotEqualMaskedInt16x16", argLength: 3, commutative: true}, {name: "OrInt16x16", argLength: 2, commutative: true}, {name: "PairDotProdInt16x16", argLength: 2, commutative: false}, + {name: "PairDotProdMaskedInt16x16", argLength: 3, commutative: false}, {name: "PairwiseAddInt16x16", argLength: 2, commutative: false}, {name: "PairwiseSubInt16x16", argLength: 2, commutative: false}, {name: "PopCountInt16x16", argLength: 1, commutative: false}, + {name: "PopCountMaskedInt16x16", argLength: 2, commutative: false}, {name: "SaturatedAddInt16x16", argLength: 2, commutative: true}, + {name: "SaturatedAddMaskedInt16x16", argLength: 3, commutative: true}, {name: "SaturatedPairwiseAddInt16x16", argLength: 2, commutative: false}, {name: "SaturatedPairwiseSubInt16x16", argLength: 2, commutative: false}, {name: "SaturatedSubInt16x16", argLength: 2, commutative: false}, + {name: "SaturatedSubMaskedInt16x16", argLength: 3, commutative: false}, {name: "ShiftAllLeftInt16x16", argLength: 2, commutative: false}, {name: "ShiftAllRightInt16x16", argLength: 2, commutative: false}, {name: "ShiftAllRightSignExtendedInt16x16", argLength: 2, commutative: false}, {name: "ShiftLeftInt16x16", argLength: 2, commutative: false}, {name: "ShiftLeftAndFillUpperFromInt16x16", argLength: 3, commutative: false}, + {name: "ShiftLeftAndFillUpperFromMaskedInt16x16", argLength: 4, commutative: false}, + {name: "ShiftLeftMaskedInt16x16", argLength: 3, commutative: false}, {name: "ShiftRightInt16x16", argLength: 2, commutative: false}, {name: "ShiftRightAndFillUpperFromInt16x16", argLength: 3, commutative: false}, + {name: "ShiftRightAndFillUpperFromMaskedInt16x16", argLength: 4, commutative: false}, + {name: "ShiftRightMaskedInt16x16", argLength: 3, commutative: false}, {name: "ShiftRightSignExtendedInt16x16", argLength: 2, commutative: false}, + {name: "ShiftRightSignExtendedMaskedInt16x16", argLength: 3, commutative: false}, {name: "SignInt16x16", argLength: 2, commutative: false}, {name: "SubInt16x16", argLength: 2, commutative: false}, + {name: "SubMaskedInt16x16", argLength: 3, commutative: false}, {name: "XorInt16x16", argLength: 2, commutative: true}, {name: "AbsoluteInt16x32", argLength: 1, commutative: false}, + {name: "AbsoluteMaskedInt16x32", argLength: 2, commutative: false}, {name: "AddInt16x32", argLength: 2, commutative: true}, + {name: "AddMaskedInt16x32", argLength: 3, commutative: true}, {name: "EqualInt16x32", argLength: 2, commutative: true}, + {name: "EqualMaskedInt16x32", argLength: 3, commutative: true}, {name: "GreaterInt16x32", argLength: 2, commutative: false}, {name: "GreaterEqualInt16x32", argLength: 2, commutative: false}, + {name: "GreaterEqualMaskedInt16x32", argLength: 3, commutative: false}, + {name: "GreaterMaskedInt16x32", argLength: 3, commutative: false}, {name: "LessInt16x32", argLength: 2, commutative: false}, {name: "LessEqualInt16x32", argLength: 2, commutative: false}, - {name: "MaskedAbsoluteInt16x32", argLength: 2, commutative: false}, - {name: "MaskedAddInt16x32", argLength: 3, commutative: true}, - {name: "MaskedEqualInt16x32", argLength: 3, commutative: true}, - {name: "MaskedGreaterInt16x32", argLength: 3, commutative: false}, - {name: "MaskedGreaterEqualInt16x32", argLength: 3, commutative: false}, - {name: "MaskedLessInt16x32", argLength: 3, commutative: false}, - {name: "MaskedLessEqualInt16x32", argLength: 3, commutative: false}, - {name: "MaskedMaxInt16x32", argLength: 3, commutative: true}, - {name: "MaskedMinInt16x32", argLength: 3, commutative: true}, - {name: "MaskedMulHighInt16x32", argLength: 3, commutative: true}, - {name: "MaskedMulLowInt16x32", argLength: 3, commutative: true}, - {name: "MaskedNotEqualInt16x32", argLength: 3, commutative: true}, - {name: "MaskedPairDotProdInt16x32", argLength: 3, commutative: false}, - {name: "MaskedPopCountInt16x32", argLength: 2, commutative: false}, - {name: "MaskedSaturatedAddInt16x32", argLength: 3, commutative: true}, - {name: "MaskedSaturatedSubInt16x32", argLength: 3, commutative: false}, - {name: "MaskedShiftLeftInt16x32", argLength: 3, commutative: false}, - {name: "MaskedShiftLeftAndFillUpperFromInt16x32", argLength: 4, commutative: false}, - {name: "MaskedShiftRightInt16x32", argLength: 3, commutative: false}, - {name: "MaskedShiftRightAndFillUpperFromInt16x32", argLength: 4, commutative: false}, - {name: "MaskedShiftRightSignExtendedInt16x32", argLength: 3, commutative: false}, - {name: "MaskedSubInt16x32", argLength: 3, commutative: false}, + {name: "LessEqualMaskedInt16x32", argLength: 3, commutative: false}, + {name: "LessMaskedInt16x32", argLength: 3, commutative: false}, {name: "MaxInt16x32", argLength: 2, commutative: true}, + {name: "MaxMaskedInt16x32", argLength: 3, commutative: true}, {name: "MinInt16x32", argLength: 2, commutative: true}, + {name: "MinMaskedInt16x32", argLength: 3, commutative: true}, {name: "MulHighInt16x32", argLength: 2, commutative: true}, + {name: "MulHighMaskedInt16x32", argLength: 3, commutative: true}, {name: "MulLowInt16x32", argLength: 2, commutative: true}, + {name: "MulLowMaskedInt16x32", argLength: 3, commutative: true}, {name: "NotEqualInt16x32", argLength: 2, commutative: true}, + {name: "NotEqualMaskedInt16x32", argLength: 3, commutative: true}, {name: "PairDotProdInt16x32", argLength: 2, commutative: false}, + {name: "PairDotProdMaskedInt16x32", argLength: 3, commutative: false}, {name: "PopCountInt16x32", argLength: 1, commutative: false}, + {name: "PopCountMaskedInt16x32", argLength: 2, commutative: false}, {name: "SaturatedAddInt16x32", argLength: 2, commutative: true}, + {name: "SaturatedAddMaskedInt16x32", argLength: 3, commutative: true}, {name: "SaturatedSubInt16x32", argLength: 2, commutative: false}, + {name: "SaturatedSubMaskedInt16x32", argLength: 3, commutative: false}, {name: "ShiftLeftInt16x32", argLength: 2, commutative: false}, {name: "ShiftLeftAndFillUpperFromInt16x32", argLength: 3, commutative: false}, + {name: "ShiftLeftAndFillUpperFromMaskedInt16x32", argLength: 4, commutative: false}, + {name: "ShiftLeftMaskedInt16x32", argLength: 3, commutative: false}, {name: "ShiftRightInt16x32", argLength: 2, commutative: false}, {name: "ShiftRightAndFillUpperFromInt16x32", argLength: 3, commutative: false}, + {name: "ShiftRightAndFillUpperFromMaskedInt16x32", argLength: 4, commutative: false}, + {name: "ShiftRightMaskedInt16x32", argLength: 3, commutative: false}, {name: "ShiftRightSignExtendedInt16x32", argLength: 2, commutative: false}, + {name: "ShiftRightSignExtendedMaskedInt16x32", argLength: 3, commutative: false}, {name: "SubInt16x32", argLength: 2, commutative: false}, + {name: "SubMaskedInt16x32", argLength: 3, commutative: false}, {name: "AbsoluteInt16x8", argLength: 1, commutative: false}, + {name: "AbsoluteMaskedInt16x8", argLength: 2, commutative: false}, {name: "AddInt16x8", argLength: 2, commutative: true}, + {name: "AddMaskedInt16x8", argLength: 3, commutative: true}, {name: "AndInt16x8", argLength: 2, commutative: true}, {name: "AndNotInt16x8", argLength: 2, commutative: false}, {name: "EqualInt16x8", argLength: 2, commutative: true}, + {name: "EqualMaskedInt16x8", argLength: 3, commutative: true}, {name: "GreaterInt16x8", argLength: 2, commutative: false}, {name: "GreaterEqualInt16x8", argLength: 2, commutative: false}, + {name: "GreaterEqualMaskedInt16x8", argLength: 3, commutative: false}, + {name: "GreaterMaskedInt16x8", argLength: 3, commutative: false}, {name: "LessInt16x8", argLength: 2, commutative: false}, {name: "LessEqualInt16x8", argLength: 2, commutative: false}, - {name: "MaskedAbsoluteInt16x8", argLength: 2, commutative: false}, - {name: "MaskedAddInt16x8", argLength: 3, commutative: true}, - {name: "MaskedEqualInt16x8", argLength: 3, commutative: true}, - {name: "MaskedGreaterInt16x8", argLength: 3, commutative: false}, - {name: "MaskedGreaterEqualInt16x8", argLength: 3, commutative: false}, - {name: "MaskedLessInt16x8", argLength: 3, commutative: false}, - {name: "MaskedLessEqualInt16x8", argLength: 3, commutative: false}, - {name: "MaskedMaxInt16x8", argLength: 3, commutative: true}, - {name: "MaskedMinInt16x8", argLength: 3, commutative: true}, - {name: "MaskedMulHighInt16x8", argLength: 3, commutative: true}, - {name: "MaskedMulLowInt16x8", argLength: 3, commutative: true}, - {name: "MaskedNotEqualInt16x8", argLength: 3, commutative: true}, - {name: "MaskedPairDotProdInt16x8", argLength: 3, commutative: false}, - {name: "MaskedPopCountInt16x8", argLength: 2, commutative: false}, - {name: "MaskedSaturatedAddInt16x8", argLength: 3, commutative: true}, - {name: "MaskedSaturatedSubInt16x8", argLength: 3, commutative: false}, - {name: "MaskedShiftLeftInt16x8", argLength: 3, commutative: false}, - {name: "MaskedShiftLeftAndFillUpperFromInt16x8", argLength: 4, commutative: false}, - {name: "MaskedShiftRightInt16x8", argLength: 3, commutative: false}, - {name: "MaskedShiftRightAndFillUpperFromInt16x8", argLength: 4, commutative: false}, - {name: "MaskedShiftRightSignExtendedInt16x8", argLength: 3, commutative: false}, - {name: "MaskedSubInt16x8", argLength: 3, commutative: false}, + {name: "LessEqualMaskedInt16x8", argLength: 3, commutative: false}, + {name: "LessMaskedInt16x8", argLength: 3, commutative: false}, {name: "MaxInt16x8", argLength: 2, commutative: true}, + {name: "MaxMaskedInt16x8", argLength: 3, commutative: true}, {name: "MinInt16x8", argLength: 2, commutative: true}, + {name: "MinMaskedInt16x8", argLength: 3, commutative: true}, {name: "MulHighInt16x8", argLength: 2, commutative: true}, + {name: "MulHighMaskedInt16x8", argLength: 3, commutative: true}, {name: "MulLowInt16x8", argLength: 2, commutative: true}, + {name: "MulLowMaskedInt16x8", argLength: 3, commutative: true}, {name: "NotEqualInt16x8", argLength: 2, commutative: true}, + {name: "NotEqualMaskedInt16x8", argLength: 3, commutative: true}, {name: "OrInt16x8", argLength: 2, commutative: true}, {name: "PairDotProdInt16x8", argLength: 2, commutative: false}, + {name: "PairDotProdMaskedInt16x8", argLength: 3, commutative: false}, {name: "PairwiseAddInt16x8", argLength: 2, commutative: false}, {name: "PairwiseSubInt16x8", argLength: 2, commutative: false}, {name: "PopCountInt16x8", argLength: 1, commutative: false}, + {name: "PopCountMaskedInt16x8", argLength: 2, commutative: false}, {name: "SaturatedAddInt16x8", argLength: 2, commutative: true}, + {name: "SaturatedAddMaskedInt16x8", argLength: 3, commutative: true}, {name: "SaturatedPairwiseAddInt16x8", argLength: 2, commutative: false}, {name: "SaturatedPairwiseSubInt16x8", argLength: 2, commutative: false}, {name: "SaturatedSubInt16x8", argLength: 2, commutative: false}, + {name: "SaturatedSubMaskedInt16x8", argLength: 3, commutative: false}, {name: "ShiftAllLeftInt16x8", argLength: 2, commutative: false}, {name: "ShiftAllRightInt16x8", argLength: 2, commutative: false}, {name: "ShiftAllRightSignExtendedInt16x8", argLength: 2, commutative: false}, {name: "ShiftLeftInt16x8", argLength: 2, commutative: false}, {name: "ShiftLeftAndFillUpperFromInt16x8", argLength: 3, commutative: false}, + {name: "ShiftLeftAndFillUpperFromMaskedInt16x8", argLength: 4, commutative: false}, + {name: "ShiftLeftMaskedInt16x8", argLength: 3, commutative: false}, {name: "ShiftRightInt16x8", argLength: 2, commutative: false}, {name: "ShiftRightAndFillUpperFromInt16x8", argLength: 3, commutative: false}, + {name: "ShiftRightAndFillUpperFromMaskedInt16x8", argLength: 4, commutative: false}, + {name: "ShiftRightMaskedInt16x8", argLength: 3, commutative: false}, {name: "ShiftRightSignExtendedInt16x8", argLength: 2, commutative: false}, + {name: "ShiftRightSignExtendedMaskedInt16x8", argLength: 3, commutative: false}, {name: "SignInt16x8", argLength: 2, commutative: false}, {name: "SubInt16x8", argLength: 2, commutative: false}, + {name: "SubMaskedInt16x8", argLength: 3, commutative: false}, {name: "XorInt16x8", argLength: 2, commutative: true}, {name: "AbsoluteInt32x16", argLength: 1, commutative: false}, + {name: "AbsoluteMaskedInt32x16", argLength: 2, commutative: false}, {name: "AddInt32x16", argLength: 2, commutative: true}, + {name: "AddMaskedInt32x16", argLength: 3, commutative: true}, {name: "AndInt32x16", argLength: 2, commutative: true}, + {name: "AndMaskedInt32x16", argLength: 3, commutative: true}, {name: "AndNotInt32x16", argLength: 2, commutative: false}, + {name: "AndNotMaskedInt32x16", argLength: 3, commutative: false}, {name: "EqualInt32x16", argLength: 2, commutative: true}, + {name: "EqualMaskedInt32x16", argLength: 3, commutative: true}, {name: "GreaterInt32x16", argLength: 2, commutative: false}, {name: "GreaterEqualInt32x16", argLength: 2, commutative: false}, + {name: "GreaterEqualMaskedInt32x16", argLength: 3, commutative: false}, + {name: "GreaterMaskedInt32x16", argLength: 3, commutative: false}, {name: "LessInt32x16", argLength: 2, commutative: false}, {name: "LessEqualInt32x16", argLength: 2, commutative: false}, - {name: "MaskedAbsoluteInt32x16", argLength: 2, commutative: false}, - {name: "MaskedAddInt32x16", argLength: 3, commutative: true}, - {name: "MaskedAndInt32x16", argLength: 3, commutative: true}, - {name: "MaskedAndNotInt32x16", argLength: 3, commutative: false}, - {name: "MaskedEqualInt32x16", argLength: 3, commutative: true}, - {name: "MaskedGreaterInt32x16", argLength: 3, commutative: false}, - {name: "MaskedGreaterEqualInt32x16", argLength: 3, commutative: false}, - {name: "MaskedLessInt32x16", argLength: 3, commutative: false}, - {name: "MaskedLessEqualInt32x16", argLength: 3, commutative: false}, - {name: "MaskedMaxInt32x16", argLength: 3, commutative: true}, - {name: "MaskedMinInt32x16", argLength: 3, commutative: true}, - {name: "MaskedMulLowInt32x16", argLength: 3, commutative: true}, - {name: "MaskedNotEqualInt32x16", argLength: 3, commutative: true}, - {name: "MaskedOrInt32x16", argLength: 3, commutative: true}, - {name: "MaskedPairDotProdAccumulateInt32x16", argLength: 4, commutative: false}, - {name: "MaskedPopCountInt32x16", argLength: 2, commutative: false}, - {name: "MaskedRotateLeftInt32x16", argLength: 3, commutative: false}, - {name: "MaskedRotateRightInt32x16", argLength: 3, commutative: false}, - {name: "MaskedSaturatedPairDotProdAccumulateInt32x16", argLength: 4, commutative: false}, - {name: "MaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16", argLength: 4, commutative: false}, - {name: "MaskedShiftLeftInt32x16", argLength: 3, commutative: false}, - {name: "MaskedShiftLeftAndFillUpperFromInt32x16", argLength: 4, commutative: false}, - {name: "MaskedShiftRightInt32x16", argLength: 3, commutative: false}, - {name: "MaskedShiftRightAndFillUpperFromInt32x16", argLength: 4, commutative: false}, - {name: "MaskedShiftRightSignExtendedInt32x16", argLength: 3, commutative: false}, - {name: "MaskedSubInt32x16", argLength: 3, commutative: false}, - {name: "MaskedUnsignedSignedQuadDotProdAccumulateInt32x16", argLength: 4, commutative: false}, - {name: "MaskedXorInt32x16", argLength: 3, commutative: true}, + {name: "LessEqualMaskedInt32x16", argLength: 3, commutative: false}, + {name: "LessMaskedInt32x16", argLength: 3, commutative: false}, {name: "MaxInt32x16", argLength: 2, commutative: true}, + {name: "MaxMaskedInt32x16", argLength: 3, commutative: true}, {name: "MinInt32x16", argLength: 2, commutative: true}, + {name: "MinMaskedInt32x16", argLength: 3, commutative: true}, {name: "MulLowInt32x16", argLength: 2, commutative: true}, + {name: "MulLowMaskedInt32x16", argLength: 3, commutative: true}, {name: "NotEqualInt32x16", argLength: 2, commutative: true}, + {name: "NotEqualMaskedInt32x16", argLength: 3, commutative: true}, {name: "OrInt32x16", argLength: 2, commutative: true}, + {name: "OrMaskedInt32x16", argLength: 3, commutative: true}, {name: "PairDotProdAccumulateInt32x16", argLength: 3, commutative: false}, + {name: "PairDotProdAccumulateMaskedInt32x16", argLength: 4, commutative: false}, {name: "PopCountInt32x16", argLength: 1, commutative: false}, + {name: "PopCountMaskedInt32x16", argLength: 2, commutative: false}, {name: "RotateLeftInt32x16", argLength: 2, commutative: false}, + {name: "RotateLeftMaskedInt32x16", argLength: 3, commutative: false}, {name: "RotateRightInt32x16", argLength: 2, commutative: false}, + {name: "RotateRightMaskedInt32x16", argLength: 3, commutative: false}, {name: "SaturatedPairDotProdAccumulateInt32x16", argLength: 3, commutative: false}, + {name: "SaturatedPairDotProdAccumulateMaskedInt32x16", argLength: 4, commutative: false}, {name: "SaturatedUnsignedSignedQuadDotProdAccumulateInt32x16", argLength: 3, commutative: false}, + {name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x16", argLength: 4, commutative: false}, {name: "ShiftLeftInt32x16", argLength: 2, commutative: false}, {name: "ShiftLeftAndFillUpperFromInt32x16", argLength: 3, commutative: false}, + {name: "ShiftLeftAndFillUpperFromMaskedInt32x16", argLength: 4, commutative: false}, + {name: "ShiftLeftMaskedInt32x16", argLength: 3, commutative: false}, {name: "ShiftRightInt32x16", argLength: 2, commutative: false}, {name: "ShiftRightAndFillUpperFromInt32x16", argLength: 3, commutative: false}, + {name: "ShiftRightAndFillUpperFromMaskedInt32x16", argLength: 4, commutative: false}, + {name: "ShiftRightMaskedInt32x16", argLength: 3, commutative: false}, {name: "ShiftRightSignExtendedInt32x16", argLength: 2, commutative: false}, + {name: "ShiftRightSignExtendedMaskedInt32x16", argLength: 3, commutative: false}, {name: "SubInt32x16", argLength: 2, commutative: false}, + {name: "SubMaskedInt32x16", argLength: 3, commutative: false}, {name: "UnsignedSignedQuadDotProdAccumulateInt32x16", argLength: 3, commutative: false}, + {name: "UnsignedSignedQuadDotProdAccumulateMaskedInt32x16", argLength: 4, commutative: false}, {name: "XorInt32x16", argLength: 2, commutative: true}, + {name: "XorMaskedInt32x16", argLength: 3, commutative: true}, {name: "AbsoluteInt32x4", argLength: 1, commutative: false}, + {name: "AbsoluteMaskedInt32x4", argLength: 2, commutative: false}, {name: "AddInt32x4", argLength: 2, commutative: true}, + {name: "AddMaskedInt32x4", argLength: 3, commutative: true}, {name: "AndInt32x4", argLength: 2, commutative: true}, + {name: "AndMaskedInt32x4", argLength: 3, commutative: true}, {name: "AndNotInt32x4", argLength: 2, commutative: false}, + {name: "AndNotMaskedInt32x4", argLength: 3, commutative: false}, {name: "EqualInt32x4", argLength: 2, commutative: true}, + {name: "EqualMaskedInt32x4", argLength: 3, commutative: true}, {name: "GreaterInt32x4", argLength: 2, commutative: false}, {name: "GreaterEqualInt32x4", argLength: 2, commutative: false}, + {name: "GreaterEqualMaskedInt32x4", argLength: 3, commutative: false}, + {name: "GreaterMaskedInt32x4", argLength: 3, commutative: false}, {name: "LessInt32x4", argLength: 2, commutative: false}, {name: "LessEqualInt32x4", argLength: 2, commutative: false}, - {name: "MaskedAbsoluteInt32x4", argLength: 2, commutative: false}, - {name: "MaskedAddInt32x4", argLength: 3, commutative: true}, - {name: "MaskedAndInt32x4", argLength: 3, commutative: true}, - {name: "MaskedAndNotInt32x4", argLength: 3, commutative: false}, - {name: "MaskedEqualInt32x4", argLength: 3, commutative: true}, - {name: "MaskedGreaterInt32x4", argLength: 3, commutative: false}, - {name: "MaskedGreaterEqualInt32x4", argLength: 3, commutative: false}, - {name: "MaskedLessInt32x4", argLength: 3, commutative: false}, - {name: "MaskedLessEqualInt32x4", argLength: 3, commutative: false}, - {name: "MaskedMaxInt32x4", argLength: 3, commutative: true}, - {name: "MaskedMinInt32x4", argLength: 3, commutative: true}, - {name: "MaskedMulLowInt32x4", argLength: 3, commutative: true}, - {name: "MaskedNotEqualInt32x4", argLength: 3, commutative: true}, - {name: "MaskedOrInt32x4", argLength: 3, commutative: true}, - {name: "MaskedPairDotProdAccumulateInt32x4", argLength: 4, commutative: false}, - {name: "MaskedPopCountInt32x4", argLength: 2, commutative: false}, - {name: "MaskedRotateLeftInt32x4", argLength: 3, commutative: false}, - {name: "MaskedRotateRightInt32x4", argLength: 3, commutative: false}, - {name: "MaskedSaturatedPairDotProdAccumulateInt32x4", argLength: 4, commutative: false}, - {name: "MaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4", argLength: 4, commutative: false}, - {name: "MaskedShiftLeftInt32x4", argLength: 3, commutative: false}, - {name: "MaskedShiftLeftAndFillUpperFromInt32x4", argLength: 4, commutative: false}, - {name: "MaskedShiftRightInt32x4", argLength: 3, commutative: false}, - {name: "MaskedShiftRightAndFillUpperFromInt32x4", argLength: 4, commutative: false}, - {name: "MaskedShiftRightSignExtendedInt32x4", argLength: 3, commutative: false}, - {name: "MaskedSubInt32x4", argLength: 3, commutative: false}, - {name: "MaskedUnsignedSignedQuadDotProdAccumulateInt32x4", argLength: 4, commutative: false}, - {name: "MaskedXorInt32x4", argLength: 3, commutative: true}, + {name: "LessEqualMaskedInt32x4", argLength: 3, commutative: false}, + {name: "LessMaskedInt32x4", argLength: 3, commutative: false}, {name: "MaxInt32x4", argLength: 2, commutative: true}, + {name: "MaxMaskedInt32x4", argLength: 3, commutative: true}, {name: "MinInt32x4", argLength: 2, commutative: true}, + {name: "MinMaskedInt32x4", argLength: 3, commutative: true}, {name: "MulEvenWidenInt32x4", argLength: 2, commutative: true}, {name: "MulLowInt32x4", argLength: 2, commutative: true}, + {name: "MulLowMaskedInt32x4", argLength: 3, commutative: true}, {name: "NotEqualInt32x4", argLength: 2, commutative: true}, + {name: "NotEqualMaskedInt32x4", argLength: 3, commutative: true}, {name: "OrInt32x4", argLength: 2, commutative: true}, + {name: "OrMaskedInt32x4", argLength: 3, commutative: true}, {name: "PairDotProdAccumulateInt32x4", argLength: 3, commutative: false}, + {name: "PairDotProdAccumulateMaskedInt32x4", argLength: 4, commutative: false}, {name: "PairwiseAddInt32x4", argLength: 2, commutative: false}, {name: "PairwiseSubInt32x4", argLength: 2, commutative: false}, {name: "PopCountInt32x4", argLength: 1, commutative: false}, + {name: "PopCountMaskedInt32x4", argLength: 2, commutative: false}, {name: "RotateLeftInt32x4", argLength: 2, commutative: false}, + {name: "RotateLeftMaskedInt32x4", argLength: 3, commutative: false}, {name: "RotateRightInt32x4", argLength: 2, commutative: false}, + {name: "RotateRightMaskedInt32x4", argLength: 3, commutative: false}, {name: "SaturatedPairDotProdAccumulateInt32x4", argLength: 3, commutative: false}, + {name: "SaturatedPairDotProdAccumulateMaskedInt32x4", argLength: 4, commutative: false}, {name: "SaturatedUnsignedSignedQuadDotProdAccumulateInt32x4", argLength: 3, commutative: false}, + {name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x4", argLength: 4, commutative: false}, {name: "ShiftAllLeftInt32x4", argLength: 2, commutative: false}, {name: "ShiftAllRightInt32x4", argLength: 2, commutative: false}, {name: "ShiftAllRightSignExtendedInt32x4", argLength: 2, commutative: false}, {name: "ShiftLeftInt32x4", argLength: 2, commutative: false}, {name: "ShiftLeftAndFillUpperFromInt32x4", argLength: 3, commutative: false}, + {name: "ShiftLeftAndFillUpperFromMaskedInt32x4", argLength: 4, commutative: false}, + {name: "ShiftLeftMaskedInt32x4", argLength: 3, commutative: false}, {name: "ShiftRightInt32x4", argLength: 2, commutative: false}, {name: "ShiftRightAndFillUpperFromInt32x4", argLength: 3, commutative: false}, + {name: "ShiftRightAndFillUpperFromMaskedInt32x4", argLength: 4, commutative: false}, + {name: "ShiftRightMaskedInt32x4", argLength: 3, commutative: false}, {name: "ShiftRightSignExtendedInt32x4", argLength: 2, commutative: false}, + {name: "ShiftRightSignExtendedMaskedInt32x4", argLength: 3, commutative: false}, {name: "SignInt32x4", argLength: 2, commutative: false}, {name: "SubInt32x4", argLength: 2, commutative: false}, + {name: "SubMaskedInt32x4", argLength: 3, commutative: false}, {name: "UnsignedSignedQuadDotProdAccumulateInt32x4", argLength: 3, commutative: false}, + {name: "UnsignedSignedQuadDotProdAccumulateMaskedInt32x4", argLength: 4, commutative: false}, {name: "XorInt32x4", argLength: 2, commutative: true}, + {name: "XorMaskedInt32x4", argLength: 3, commutative: true}, {name: "AbsoluteInt32x8", argLength: 1, commutative: false}, + {name: "AbsoluteMaskedInt32x8", argLength: 2, commutative: false}, {name: "AddInt32x8", argLength: 2, commutative: true}, + {name: "AddMaskedInt32x8", argLength: 3, commutative: true}, {name: "AndInt32x8", argLength: 2, commutative: true}, + {name: "AndMaskedInt32x8", argLength: 3, commutative: true}, {name: "AndNotInt32x8", argLength: 2, commutative: false}, + {name: "AndNotMaskedInt32x8", argLength: 3, commutative: false}, {name: "EqualInt32x8", argLength: 2, commutative: true}, + {name: "EqualMaskedInt32x8", argLength: 3, commutative: true}, {name: "GreaterInt32x8", argLength: 2, commutative: false}, {name: "GreaterEqualInt32x8", argLength: 2, commutative: false}, + {name: "GreaterEqualMaskedInt32x8", argLength: 3, commutative: false}, + {name: "GreaterMaskedInt32x8", argLength: 3, commutative: false}, {name: "LessInt32x8", argLength: 2, commutative: false}, {name: "LessEqualInt32x8", argLength: 2, commutative: false}, - {name: "MaskedAbsoluteInt32x8", argLength: 2, commutative: false}, - {name: "MaskedAddInt32x8", argLength: 3, commutative: true}, - {name: "MaskedAndInt32x8", argLength: 3, commutative: true}, - {name: "MaskedAndNotInt32x8", argLength: 3, commutative: false}, - {name: "MaskedEqualInt32x8", argLength: 3, commutative: true}, - {name: "MaskedGreaterInt32x8", argLength: 3, commutative: false}, - {name: "MaskedGreaterEqualInt32x8", argLength: 3, commutative: false}, - {name: "MaskedLessInt32x8", argLength: 3, commutative: false}, - {name: "MaskedLessEqualInt32x8", argLength: 3, commutative: false}, - {name: "MaskedMaxInt32x8", argLength: 3, commutative: true}, - {name: "MaskedMinInt32x8", argLength: 3, commutative: true}, - {name: "MaskedMulLowInt32x8", argLength: 3, commutative: true}, - {name: "MaskedNotEqualInt32x8", argLength: 3, commutative: true}, - {name: "MaskedOrInt32x8", argLength: 3, commutative: true}, - {name: "MaskedPairDotProdAccumulateInt32x8", argLength: 4, commutative: false}, - {name: "MaskedPopCountInt32x8", argLength: 2, commutative: false}, - {name: "MaskedRotateLeftInt32x8", argLength: 3, commutative: false}, - {name: "MaskedRotateRightInt32x8", argLength: 3, commutative: false}, - {name: "MaskedSaturatedPairDotProdAccumulateInt32x8", argLength: 4, commutative: false}, - {name: "MaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8", argLength: 4, commutative: false}, - {name: "MaskedShiftLeftInt32x8", argLength: 3, commutative: false}, - {name: "MaskedShiftLeftAndFillUpperFromInt32x8", argLength: 4, commutative: false}, - {name: "MaskedShiftRightInt32x8", argLength: 3, commutative: false}, - {name: "MaskedShiftRightAndFillUpperFromInt32x8", argLength: 4, commutative: false}, - {name: "MaskedShiftRightSignExtendedInt32x8", argLength: 3, commutative: false}, - {name: "MaskedSubInt32x8", argLength: 3, commutative: false}, - {name: "MaskedUnsignedSignedQuadDotProdAccumulateInt32x8", argLength: 4, commutative: false}, - {name: "MaskedXorInt32x8", argLength: 3, commutative: true}, + {name: "LessEqualMaskedInt32x8", argLength: 3, commutative: false}, + {name: "LessMaskedInt32x8", argLength: 3, commutative: false}, {name: "MaxInt32x8", argLength: 2, commutative: true}, + {name: "MaxMaskedInt32x8", argLength: 3, commutative: true}, {name: "MinInt32x8", argLength: 2, commutative: true}, + {name: "MinMaskedInt32x8", argLength: 3, commutative: true}, {name: "MulEvenWidenInt32x8", argLength: 2, commutative: true}, {name: "MulLowInt32x8", argLength: 2, commutative: true}, + {name: "MulLowMaskedInt32x8", argLength: 3, commutative: true}, {name: "NotEqualInt32x8", argLength: 2, commutative: true}, + {name: "NotEqualMaskedInt32x8", argLength: 3, commutative: true}, {name: "OrInt32x8", argLength: 2, commutative: true}, + {name: "OrMaskedInt32x8", argLength: 3, commutative: true}, {name: "PairDotProdAccumulateInt32x8", argLength: 3, commutative: false}, + {name: "PairDotProdAccumulateMaskedInt32x8", argLength: 4, commutative: false}, {name: "PairwiseAddInt32x8", argLength: 2, commutative: false}, {name: "PairwiseSubInt32x8", argLength: 2, commutative: false}, {name: "PopCountInt32x8", argLength: 1, commutative: false}, + {name: "PopCountMaskedInt32x8", argLength: 2, commutative: false}, {name: "RotateLeftInt32x8", argLength: 2, commutative: false}, + {name: "RotateLeftMaskedInt32x8", argLength: 3, commutative: false}, {name: "RotateRightInt32x8", argLength: 2, commutative: false}, + {name: "RotateRightMaskedInt32x8", argLength: 3, commutative: false}, {name: "SaturatedPairDotProdAccumulateInt32x8", argLength: 3, commutative: false}, + {name: "SaturatedPairDotProdAccumulateMaskedInt32x8", argLength: 4, commutative: false}, {name: "SaturatedUnsignedSignedQuadDotProdAccumulateInt32x8", argLength: 3, commutative: false}, + {name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x8", argLength: 4, commutative: false}, {name: "ShiftAllLeftInt32x8", argLength: 2, commutative: false}, {name: "ShiftAllRightInt32x8", argLength: 2, commutative: false}, {name: "ShiftAllRightSignExtendedInt32x8", argLength: 2, commutative: false}, {name: "ShiftLeftInt32x8", argLength: 2, commutative: false}, {name: "ShiftLeftAndFillUpperFromInt32x8", argLength: 3, commutative: false}, + {name: "ShiftLeftAndFillUpperFromMaskedInt32x8", argLength: 4, commutative: false}, + {name: "ShiftLeftMaskedInt32x8", argLength: 3, commutative: false}, {name: "ShiftRightInt32x8", argLength: 2, commutative: false}, {name: "ShiftRightAndFillUpperFromInt32x8", argLength: 3, commutative: false}, + {name: "ShiftRightAndFillUpperFromMaskedInt32x8", argLength: 4, commutative: false}, + {name: "ShiftRightMaskedInt32x8", argLength: 3, commutative: false}, {name: "ShiftRightSignExtendedInt32x8", argLength: 2, commutative: false}, + {name: "ShiftRightSignExtendedMaskedInt32x8", argLength: 3, commutative: false}, {name: "SignInt32x8", argLength: 2, commutative: false}, {name: "SubInt32x8", argLength: 2, commutative: false}, + {name: "SubMaskedInt32x8", argLength: 3, commutative: false}, {name: "UnsignedSignedQuadDotProdAccumulateInt32x8", argLength: 3, commutative: false}, + {name: "UnsignedSignedQuadDotProdAccumulateMaskedInt32x8", argLength: 4, commutative: false}, {name: "XorInt32x8", argLength: 2, commutative: true}, + {name: "XorMaskedInt32x8", argLength: 3, commutative: true}, {name: "AbsoluteInt64x2", argLength: 1, commutative: false}, + {name: "AbsoluteMaskedInt64x2", argLength: 2, commutative: false}, {name: "AddInt64x2", argLength: 2, commutative: true}, + {name: "AddMaskedInt64x2", argLength: 3, commutative: true}, {name: "AndInt64x2", argLength: 2, commutative: true}, + {name: "AndMaskedInt64x2", argLength: 3, commutative: true}, {name: "AndNotInt64x2", argLength: 2, commutative: false}, + {name: "AndNotMaskedInt64x2", argLength: 3, commutative: false}, {name: "EqualInt64x2", argLength: 2, commutative: true}, + {name: "EqualMaskedInt64x2", argLength: 3, commutative: true}, {name: "GreaterInt64x2", argLength: 2, commutative: false}, {name: "GreaterEqualInt64x2", argLength: 2, commutative: false}, + {name: "GreaterEqualMaskedInt64x2", argLength: 3, commutative: false}, + {name: "GreaterMaskedInt64x2", argLength: 3, commutative: false}, {name: "LessInt64x2", argLength: 2, commutative: false}, {name: "LessEqualInt64x2", argLength: 2, commutative: false}, - {name: "MaskedAbsoluteInt64x2", argLength: 2, commutative: false}, - {name: "MaskedAddInt64x2", argLength: 3, commutative: true}, - {name: "MaskedAndInt64x2", argLength: 3, commutative: true}, - {name: "MaskedAndNotInt64x2", argLength: 3, commutative: false}, - {name: "MaskedEqualInt64x2", argLength: 3, commutative: true}, - {name: "MaskedGreaterInt64x2", argLength: 3, commutative: false}, - {name: "MaskedGreaterEqualInt64x2", argLength: 3, commutative: false}, - {name: "MaskedLessInt64x2", argLength: 3, commutative: false}, - {name: "MaskedLessEqualInt64x2", argLength: 3, commutative: false}, - {name: "MaskedMaxInt64x2", argLength: 3, commutative: true}, - {name: "MaskedMinInt64x2", argLength: 3, commutative: true}, - {name: "MaskedMulEvenWidenInt64x2", argLength: 3, commutative: true}, - {name: "MaskedMulLowInt64x2", argLength: 3, commutative: true}, - {name: "MaskedNotEqualInt64x2", argLength: 3, commutative: true}, - {name: "MaskedOrInt64x2", argLength: 3, commutative: true}, - {name: "MaskedPopCountInt64x2", argLength: 2, commutative: false}, - {name: "MaskedRotateLeftInt64x2", argLength: 3, commutative: false}, - {name: "MaskedRotateRightInt64x2", argLength: 3, commutative: false}, - {name: "MaskedShiftAllLeftInt64x2", argLength: 3, commutative: false}, - {name: "MaskedShiftAllRightInt64x2", argLength: 3, commutative: false}, - {name: "MaskedShiftAllRightSignExtendedInt64x2", argLength: 3, commutative: false}, - {name: "MaskedShiftLeftInt64x2", argLength: 3, commutative: false}, - {name: "MaskedShiftLeftAndFillUpperFromInt64x2", argLength: 4, commutative: false}, - {name: "MaskedShiftRightInt64x2", argLength: 3, commutative: false}, - {name: "MaskedShiftRightAndFillUpperFromInt64x2", argLength: 4, commutative: false}, - {name: "MaskedShiftRightSignExtendedInt64x2", argLength: 3, commutative: false}, - {name: "MaskedSubInt64x2", argLength: 3, commutative: false}, - {name: "MaskedXorInt64x2", argLength: 3, commutative: true}, + {name: "LessEqualMaskedInt64x2", argLength: 3, commutative: false}, + {name: "LessMaskedInt64x2", argLength: 3, commutative: false}, {name: "MaxInt64x2", argLength: 2, commutative: true}, + {name: "MaxMaskedInt64x2", argLength: 3, commutative: true}, {name: "MinInt64x2", argLength: 2, commutative: true}, + {name: "MinMaskedInt64x2", argLength: 3, commutative: true}, {name: "MulEvenWidenInt64x2", argLength: 2, commutative: true}, + {name: "MulEvenWidenMaskedInt64x2", argLength: 3, commutative: true}, {name: "MulLowInt64x2", argLength: 2, commutative: true}, + {name: "MulLowMaskedInt64x2", argLength: 3, commutative: true}, {name: "NotEqualInt64x2", argLength: 2, commutative: true}, + {name: "NotEqualMaskedInt64x2", argLength: 3, commutative: true}, {name: "OrInt64x2", argLength: 2, commutative: true}, + {name: "OrMaskedInt64x2", argLength: 3, commutative: true}, {name: "PopCountInt64x2", argLength: 1, commutative: false}, + {name: "PopCountMaskedInt64x2", argLength: 2, commutative: false}, {name: "RotateLeftInt64x2", argLength: 2, commutative: false}, + {name: "RotateLeftMaskedInt64x2", argLength: 3, commutative: false}, {name: "RotateRightInt64x2", argLength: 2, commutative: false}, + {name: "RotateRightMaskedInt64x2", argLength: 3, commutative: false}, {name: "ShiftAllLeftInt64x2", argLength: 2, commutative: false}, + {name: "ShiftAllLeftMaskedInt64x2", argLength: 3, commutative: false}, {name: "ShiftAllRightInt64x2", argLength: 2, commutative: false}, + {name: "ShiftAllRightMaskedInt64x2", argLength: 3, commutative: false}, {name: "ShiftAllRightSignExtendedInt64x2", argLength: 2, commutative: false}, + {name: "ShiftAllRightSignExtendedMaskedInt64x2", argLength: 3, commutative: false}, {name: "ShiftLeftInt64x2", argLength: 2, commutative: false}, {name: "ShiftLeftAndFillUpperFromInt64x2", argLength: 3, commutative: false}, + {name: "ShiftLeftAndFillUpperFromMaskedInt64x2", argLength: 4, commutative: false}, + {name: "ShiftLeftMaskedInt64x2", argLength: 3, commutative: false}, {name: "ShiftRightInt64x2", argLength: 2, commutative: false}, {name: "ShiftRightAndFillUpperFromInt64x2", argLength: 3, commutative: false}, + {name: "ShiftRightAndFillUpperFromMaskedInt64x2", argLength: 4, commutative: false}, + {name: "ShiftRightMaskedInt64x2", argLength: 3, commutative: false}, {name: "ShiftRightSignExtendedInt64x2", argLength: 2, commutative: false}, + {name: "ShiftRightSignExtendedMaskedInt64x2", argLength: 3, commutative: false}, {name: "SubInt64x2", argLength: 2, commutative: false}, + {name: "SubMaskedInt64x2", argLength: 3, commutative: false}, {name: "XorInt64x2", argLength: 2, commutative: true}, + {name: "XorMaskedInt64x2", argLength: 3, commutative: true}, {name: "AbsoluteInt64x4", argLength: 1, commutative: false}, + {name: "AbsoluteMaskedInt64x4", argLength: 2, commutative: false}, {name: "AddInt64x4", argLength: 2, commutative: true}, + {name: "AddMaskedInt64x4", argLength: 3, commutative: true}, {name: "AndInt64x4", argLength: 2, commutative: true}, + {name: "AndMaskedInt64x4", argLength: 3, commutative: true}, {name: "AndNotInt64x4", argLength: 2, commutative: false}, + {name: "AndNotMaskedInt64x4", argLength: 3, commutative: false}, {name: "EqualInt64x4", argLength: 2, commutative: true}, + {name: "EqualMaskedInt64x4", argLength: 3, commutative: true}, {name: "GreaterInt64x4", argLength: 2, commutative: false}, {name: "GreaterEqualInt64x4", argLength: 2, commutative: false}, + {name: "GreaterEqualMaskedInt64x4", argLength: 3, commutative: false}, + {name: "GreaterMaskedInt64x4", argLength: 3, commutative: false}, {name: "LessInt64x4", argLength: 2, commutative: false}, {name: "LessEqualInt64x4", argLength: 2, commutative: false}, - {name: "MaskedAbsoluteInt64x4", argLength: 2, commutative: false}, - {name: "MaskedAddInt64x4", argLength: 3, commutative: true}, - {name: "MaskedAndInt64x4", argLength: 3, commutative: true}, - {name: "MaskedAndNotInt64x4", argLength: 3, commutative: false}, - {name: "MaskedEqualInt64x4", argLength: 3, commutative: true}, - {name: "MaskedGreaterInt64x4", argLength: 3, commutative: false}, - {name: "MaskedGreaterEqualInt64x4", argLength: 3, commutative: false}, - {name: "MaskedLessInt64x4", argLength: 3, commutative: false}, - {name: "MaskedLessEqualInt64x4", argLength: 3, commutative: false}, - {name: "MaskedMaxInt64x4", argLength: 3, commutative: true}, - {name: "MaskedMinInt64x4", argLength: 3, commutative: true}, - {name: "MaskedMulEvenWidenInt64x4", argLength: 3, commutative: true}, - {name: "MaskedMulLowInt64x4", argLength: 3, commutative: true}, - {name: "MaskedNotEqualInt64x4", argLength: 3, commutative: true}, - {name: "MaskedOrInt64x4", argLength: 3, commutative: true}, - {name: "MaskedPopCountInt64x4", argLength: 2, commutative: false}, - {name: "MaskedRotateLeftInt64x4", argLength: 3, commutative: false}, - {name: "MaskedRotateRightInt64x4", argLength: 3, commutative: false}, - {name: "MaskedShiftAllLeftInt64x4", argLength: 3, commutative: false}, - {name: "MaskedShiftAllRightInt64x4", argLength: 3, commutative: false}, - {name: "MaskedShiftAllRightSignExtendedInt64x4", argLength: 3, commutative: false}, - {name: "MaskedShiftLeftInt64x4", argLength: 3, commutative: false}, - {name: "MaskedShiftLeftAndFillUpperFromInt64x4", argLength: 4, commutative: false}, - {name: "MaskedShiftRightInt64x4", argLength: 3, commutative: false}, - {name: "MaskedShiftRightAndFillUpperFromInt64x4", argLength: 4, commutative: false}, - {name: "MaskedShiftRightSignExtendedInt64x4", argLength: 3, commutative: false}, - {name: "MaskedSubInt64x4", argLength: 3, commutative: false}, - {name: "MaskedXorInt64x4", argLength: 3, commutative: true}, + {name: "LessEqualMaskedInt64x4", argLength: 3, commutative: false}, + {name: "LessMaskedInt64x4", argLength: 3, commutative: false}, {name: "MaxInt64x4", argLength: 2, commutative: true}, + {name: "MaxMaskedInt64x4", argLength: 3, commutative: true}, {name: "MinInt64x4", argLength: 2, commutative: true}, + {name: "MinMaskedInt64x4", argLength: 3, commutative: true}, {name: "MulEvenWidenInt64x4", argLength: 2, commutative: true}, + {name: "MulEvenWidenMaskedInt64x4", argLength: 3, commutative: true}, {name: "MulLowInt64x4", argLength: 2, commutative: true}, + {name: "MulLowMaskedInt64x4", argLength: 3, commutative: true}, {name: "NotEqualInt64x4", argLength: 2, commutative: true}, + {name: "NotEqualMaskedInt64x4", argLength: 3, commutative: true}, {name: "OrInt64x4", argLength: 2, commutative: true}, + {name: "OrMaskedInt64x4", argLength: 3, commutative: true}, {name: "PopCountInt64x4", argLength: 1, commutative: false}, + {name: "PopCountMaskedInt64x4", argLength: 2, commutative: false}, {name: "RotateLeftInt64x4", argLength: 2, commutative: false}, + {name: "RotateLeftMaskedInt64x4", argLength: 3, commutative: false}, {name: "RotateRightInt64x4", argLength: 2, commutative: false}, + {name: "RotateRightMaskedInt64x4", argLength: 3, commutative: false}, {name: "ShiftAllLeftInt64x4", argLength: 2, commutative: false}, + {name: "ShiftAllLeftMaskedInt64x4", argLength: 3, commutative: false}, {name: "ShiftAllRightInt64x4", argLength: 2, commutative: false}, + {name: "ShiftAllRightMaskedInt64x4", argLength: 3, commutative: false}, {name: "ShiftAllRightSignExtendedInt64x4", argLength: 2, commutative: false}, + {name: "ShiftAllRightSignExtendedMaskedInt64x4", argLength: 3, commutative: false}, {name: "ShiftLeftInt64x4", argLength: 2, commutative: false}, {name: "ShiftLeftAndFillUpperFromInt64x4", argLength: 3, commutative: false}, + {name: "ShiftLeftAndFillUpperFromMaskedInt64x4", argLength: 4, commutative: false}, + {name: "ShiftLeftMaskedInt64x4", argLength: 3, commutative: false}, {name: "ShiftRightInt64x4", argLength: 2, commutative: false}, {name: "ShiftRightAndFillUpperFromInt64x4", argLength: 3, commutative: false}, + {name: "ShiftRightAndFillUpperFromMaskedInt64x4", argLength: 4, commutative: false}, + {name: "ShiftRightMaskedInt64x4", argLength: 3, commutative: false}, {name: "ShiftRightSignExtendedInt64x4", argLength: 2, commutative: false}, + {name: "ShiftRightSignExtendedMaskedInt64x4", argLength: 3, commutative: false}, {name: "SubInt64x4", argLength: 2, commutative: false}, + {name: "SubMaskedInt64x4", argLength: 3, commutative: false}, {name: "XorInt64x4", argLength: 2, commutative: true}, + {name: "XorMaskedInt64x4", argLength: 3, commutative: true}, {name: "AbsoluteInt64x8", argLength: 1, commutative: false}, + {name: "AbsoluteMaskedInt64x8", argLength: 2, commutative: false}, {name: "AddInt64x8", argLength: 2, commutative: true}, + {name: "AddMaskedInt64x8", argLength: 3, commutative: true}, {name: "AndInt64x8", argLength: 2, commutative: true}, + {name: "AndMaskedInt64x8", argLength: 3, commutative: true}, {name: "AndNotInt64x8", argLength: 2, commutative: false}, + {name: "AndNotMaskedInt64x8", argLength: 3, commutative: false}, {name: "EqualInt64x8", argLength: 2, commutative: true}, + {name: "EqualMaskedInt64x8", argLength: 3, commutative: true}, {name: "GreaterInt64x8", argLength: 2, commutative: false}, {name: "GreaterEqualInt64x8", argLength: 2, commutative: false}, + {name: "GreaterEqualMaskedInt64x8", argLength: 3, commutative: false}, + {name: "GreaterMaskedInt64x8", argLength: 3, commutative: false}, {name: "LessInt64x8", argLength: 2, commutative: false}, {name: "LessEqualInt64x8", argLength: 2, commutative: false}, - {name: "MaskedAbsoluteInt64x8", argLength: 2, commutative: false}, - {name: "MaskedAddInt64x8", argLength: 3, commutative: true}, - {name: "MaskedAndInt64x8", argLength: 3, commutative: true}, - {name: "MaskedAndNotInt64x8", argLength: 3, commutative: false}, - {name: "MaskedEqualInt64x8", argLength: 3, commutative: true}, - {name: "MaskedGreaterInt64x8", argLength: 3, commutative: false}, - {name: "MaskedGreaterEqualInt64x8", argLength: 3, commutative: false}, - {name: "MaskedLessInt64x8", argLength: 3, commutative: false}, - {name: "MaskedLessEqualInt64x8", argLength: 3, commutative: false}, - {name: "MaskedMaxInt64x8", argLength: 3, commutative: true}, - {name: "MaskedMinInt64x8", argLength: 3, commutative: true}, - {name: "MaskedMulEvenWidenInt64x8", argLength: 3, commutative: true}, - {name: "MaskedMulLowInt64x8", argLength: 3, commutative: true}, - {name: "MaskedNotEqualInt64x8", argLength: 3, commutative: true}, - {name: "MaskedOrInt64x8", argLength: 3, commutative: true}, - {name: "MaskedPopCountInt64x8", argLength: 2, commutative: false}, - {name: "MaskedRotateLeftInt64x8", argLength: 3, commutative: false}, - {name: "MaskedRotateRightInt64x8", argLength: 3, commutative: false}, - {name: "MaskedShiftAllLeftInt64x8", argLength: 3, commutative: false}, - {name: "MaskedShiftAllRightInt64x8", argLength: 3, commutative: false}, - {name: "MaskedShiftAllRightSignExtendedInt64x8", argLength: 3, commutative: false}, - {name: "MaskedShiftLeftInt64x8", argLength: 3, commutative: false}, - {name: "MaskedShiftLeftAndFillUpperFromInt64x8", argLength: 4, commutative: false}, - {name: "MaskedShiftRightInt64x8", argLength: 3, commutative: false}, - {name: "MaskedShiftRightAndFillUpperFromInt64x8", argLength: 4, commutative: false}, - {name: "MaskedShiftRightSignExtendedInt64x8", argLength: 3, commutative: false}, - {name: "MaskedSubInt64x8", argLength: 3, commutative: false}, - {name: "MaskedXorInt64x8", argLength: 3, commutative: true}, + {name: "LessEqualMaskedInt64x8", argLength: 3, commutative: false}, + {name: "LessMaskedInt64x8", argLength: 3, commutative: false}, {name: "MaxInt64x8", argLength: 2, commutative: true}, + {name: "MaxMaskedInt64x8", argLength: 3, commutative: true}, {name: "MinInt64x8", argLength: 2, commutative: true}, + {name: "MinMaskedInt64x8", argLength: 3, commutative: true}, {name: "MulEvenWidenInt64x8", argLength: 2, commutative: true}, + {name: "MulEvenWidenMaskedInt64x8", argLength: 3, commutative: true}, {name: "MulLowInt64x8", argLength: 2, commutative: true}, + {name: "MulLowMaskedInt64x8", argLength: 3, commutative: true}, {name: "NotEqualInt64x8", argLength: 2, commutative: true}, + {name: "NotEqualMaskedInt64x8", argLength: 3, commutative: true}, {name: "OrInt64x8", argLength: 2, commutative: true}, + {name: "OrMaskedInt64x8", argLength: 3, commutative: true}, {name: "PopCountInt64x8", argLength: 1, commutative: false}, + {name: "PopCountMaskedInt64x8", argLength: 2, commutative: false}, {name: "RotateLeftInt64x8", argLength: 2, commutative: false}, + {name: "RotateLeftMaskedInt64x8", argLength: 3, commutative: false}, {name: "RotateRightInt64x8", argLength: 2, commutative: false}, + {name: "RotateRightMaskedInt64x8", argLength: 3, commutative: false}, {name: "ShiftAllLeftInt64x8", argLength: 2, commutative: false}, + {name: "ShiftAllLeftMaskedInt64x8", argLength: 3, commutative: false}, {name: "ShiftAllRightInt64x8", argLength: 2, commutative: false}, + {name: "ShiftAllRightMaskedInt64x8", argLength: 3, commutative: false}, {name: "ShiftAllRightSignExtendedInt64x8", argLength: 2, commutative: false}, + {name: "ShiftAllRightSignExtendedMaskedInt64x8", argLength: 3, commutative: false}, {name: "ShiftLeftInt64x8", argLength: 2, commutative: false}, {name: "ShiftLeftAndFillUpperFromInt64x8", argLength: 3, commutative: false}, + {name: "ShiftLeftAndFillUpperFromMaskedInt64x8", argLength: 4, commutative: false}, + {name: "ShiftLeftMaskedInt64x8", argLength: 3, commutative: false}, {name: "ShiftRightInt64x8", argLength: 2, commutative: false}, {name: "ShiftRightAndFillUpperFromInt64x8", argLength: 3, commutative: false}, + {name: "ShiftRightAndFillUpperFromMaskedInt64x8", argLength: 4, commutative: false}, + {name: "ShiftRightMaskedInt64x8", argLength: 3, commutative: false}, {name: "ShiftRightSignExtendedInt64x8", argLength: 2, commutative: false}, + {name: "ShiftRightSignExtendedMaskedInt64x8", argLength: 3, commutative: false}, {name: "SubInt64x8", argLength: 2, commutative: false}, + {name: "SubMaskedInt64x8", argLength: 3, commutative: false}, {name: "XorInt64x8", argLength: 2, commutative: true}, + {name: "XorMaskedInt64x8", argLength: 3, commutative: true}, {name: "AbsoluteInt8x16", argLength: 1, commutative: false}, + {name: "AbsoluteMaskedInt8x16", argLength: 2, commutative: false}, {name: "AddInt8x16", argLength: 2, commutative: true}, + {name: "AddMaskedInt8x16", argLength: 3, commutative: true}, {name: "AndInt8x16", argLength: 2, commutative: true}, {name: "AndNotInt8x16", argLength: 2, commutative: false}, {name: "EqualInt8x16", argLength: 2, commutative: true}, + {name: "EqualMaskedInt8x16", argLength: 3, commutative: true}, {name: "GreaterInt8x16", argLength: 2, commutative: false}, {name: "GreaterEqualInt8x16", argLength: 2, commutative: false}, + {name: "GreaterEqualMaskedInt8x16", argLength: 3, commutative: false}, + {name: "GreaterMaskedInt8x16", argLength: 3, commutative: false}, {name: "LessInt8x16", argLength: 2, commutative: false}, {name: "LessEqualInt8x16", argLength: 2, commutative: false}, - {name: "MaskedAbsoluteInt8x16", argLength: 2, commutative: false}, - {name: "MaskedAddInt8x16", argLength: 3, commutative: true}, - {name: "MaskedEqualInt8x16", argLength: 3, commutative: true}, - {name: "MaskedGreaterInt8x16", argLength: 3, commutative: false}, - {name: "MaskedGreaterEqualInt8x16", argLength: 3, commutative: false}, - {name: "MaskedLessInt8x16", argLength: 3, commutative: false}, - {name: "MaskedLessEqualInt8x16", argLength: 3, commutative: false}, - {name: "MaskedMaxInt8x16", argLength: 3, commutative: true}, - {name: "MaskedMinInt8x16", argLength: 3, commutative: true}, - {name: "MaskedNotEqualInt8x16", argLength: 3, commutative: true}, - {name: "MaskedPopCountInt8x16", argLength: 2, commutative: false}, - {name: "MaskedSaturatedAddInt8x16", argLength: 3, commutative: true}, - {name: "MaskedSaturatedSubInt8x16", argLength: 3, commutative: false}, - {name: "MaskedSubInt8x16", argLength: 3, commutative: false}, + {name: "LessEqualMaskedInt8x16", argLength: 3, commutative: false}, + {name: "LessMaskedInt8x16", argLength: 3, commutative: false}, {name: "MaxInt8x16", argLength: 2, commutative: true}, + {name: "MaxMaskedInt8x16", argLength: 3, commutative: true}, {name: "MinInt8x16", argLength: 2, commutative: true}, + {name: "MinMaskedInt8x16", argLength: 3, commutative: true}, {name: "NotEqualInt8x16", argLength: 2, commutative: true}, + {name: "NotEqualMaskedInt8x16", argLength: 3, commutative: true}, {name: "OrInt8x16", argLength: 2, commutative: true}, {name: "PopCountInt8x16", argLength: 1, commutative: false}, + {name: "PopCountMaskedInt8x16", argLength: 2, commutative: false}, {name: "SaturatedAddInt8x16", argLength: 2, commutative: true}, + {name: "SaturatedAddMaskedInt8x16", argLength: 3, commutative: true}, {name: "SaturatedSubInt8x16", argLength: 2, commutative: false}, + {name: "SaturatedSubMaskedInt8x16", argLength: 3, commutative: false}, {name: "SignInt8x16", argLength: 2, commutative: false}, {name: "SubInt8x16", argLength: 2, commutative: false}, + {name: "SubMaskedInt8x16", argLength: 3, commutative: false}, {name: "XorInt8x16", argLength: 2, commutative: true}, {name: "AbsoluteInt8x32", argLength: 1, commutative: false}, + {name: "AbsoluteMaskedInt8x32", argLength: 2, commutative: false}, {name: "AddInt8x32", argLength: 2, commutative: true}, + {name: "AddMaskedInt8x32", argLength: 3, commutative: true}, {name: "AndInt8x32", argLength: 2, commutative: true}, {name: "AndNotInt8x32", argLength: 2, commutative: false}, {name: "EqualInt8x32", argLength: 2, commutative: true}, + {name: "EqualMaskedInt8x32", argLength: 3, commutative: true}, {name: "GreaterInt8x32", argLength: 2, commutative: false}, {name: "GreaterEqualInt8x32", argLength: 2, commutative: false}, + {name: "GreaterEqualMaskedInt8x32", argLength: 3, commutative: false}, + {name: "GreaterMaskedInt8x32", argLength: 3, commutative: false}, {name: "LessInt8x32", argLength: 2, commutative: false}, {name: "LessEqualInt8x32", argLength: 2, commutative: false}, - {name: "MaskedAbsoluteInt8x32", argLength: 2, commutative: false}, - {name: "MaskedAddInt8x32", argLength: 3, commutative: true}, - {name: "MaskedEqualInt8x32", argLength: 3, commutative: true}, - {name: "MaskedGreaterInt8x32", argLength: 3, commutative: false}, - {name: "MaskedGreaterEqualInt8x32", argLength: 3, commutative: false}, - {name: "MaskedLessInt8x32", argLength: 3, commutative: false}, - {name: "MaskedLessEqualInt8x32", argLength: 3, commutative: false}, - {name: "MaskedMaxInt8x32", argLength: 3, commutative: true}, - {name: "MaskedMinInt8x32", argLength: 3, commutative: true}, - {name: "MaskedNotEqualInt8x32", argLength: 3, commutative: true}, - {name: "MaskedPopCountInt8x32", argLength: 2, commutative: false}, - {name: "MaskedSaturatedAddInt8x32", argLength: 3, commutative: true}, - {name: "MaskedSaturatedSubInt8x32", argLength: 3, commutative: false}, - {name: "MaskedSubInt8x32", argLength: 3, commutative: false}, + {name: "LessEqualMaskedInt8x32", argLength: 3, commutative: false}, + {name: "LessMaskedInt8x32", argLength: 3, commutative: false}, {name: "MaxInt8x32", argLength: 2, commutative: true}, + {name: "MaxMaskedInt8x32", argLength: 3, commutative: true}, {name: "MinInt8x32", argLength: 2, commutative: true}, + {name: "MinMaskedInt8x32", argLength: 3, commutative: true}, {name: "NotEqualInt8x32", argLength: 2, commutative: true}, + {name: "NotEqualMaskedInt8x32", argLength: 3, commutative: true}, {name: "OrInt8x32", argLength: 2, commutative: true}, {name: "PopCountInt8x32", argLength: 1, commutative: false}, + {name: "PopCountMaskedInt8x32", argLength: 2, commutative: false}, {name: "SaturatedAddInt8x32", argLength: 2, commutative: true}, + {name: "SaturatedAddMaskedInt8x32", argLength: 3, commutative: true}, {name: "SaturatedSubInt8x32", argLength: 2, commutative: false}, + {name: "SaturatedSubMaskedInt8x32", argLength: 3, commutative: false}, {name: "SignInt8x32", argLength: 2, commutative: false}, {name: "SubInt8x32", argLength: 2, commutative: false}, + {name: "SubMaskedInt8x32", argLength: 3, commutative: false}, {name: "XorInt8x32", argLength: 2, commutative: true}, {name: "AbsoluteInt8x64", argLength: 1, commutative: false}, + {name: "AbsoluteMaskedInt8x64", argLength: 2, commutative: false}, {name: "AddInt8x64", argLength: 2, commutative: true}, + {name: "AddMaskedInt8x64", argLength: 3, commutative: true}, {name: "EqualInt8x64", argLength: 2, commutative: true}, + {name: "EqualMaskedInt8x64", argLength: 3, commutative: true}, {name: "GreaterInt8x64", argLength: 2, commutative: false}, {name: "GreaterEqualInt8x64", argLength: 2, commutative: false}, + {name: "GreaterEqualMaskedInt8x64", argLength: 3, commutative: false}, + {name: "GreaterMaskedInt8x64", argLength: 3, commutative: false}, {name: "LessInt8x64", argLength: 2, commutative: false}, {name: "LessEqualInt8x64", argLength: 2, commutative: false}, - {name: "MaskedAbsoluteInt8x64", argLength: 2, commutative: false}, - {name: "MaskedAddInt8x64", argLength: 3, commutative: true}, - {name: "MaskedEqualInt8x64", argLength: 3, commutative: true}, - {name: "MaskedGreaterInt8x64", argLength: 3, commutative: false}, - {name: "MaskedGreaterEqualInt8x64", argLength: 3, commutative: false}, - {name: "MaskedLessInt8x64", argLength: 3, commutative: false}, - {name: "MaskedLessEqualInt8x64", argLength: 3, commutative: false}, - {name: "MaskedMaxInt8x64", argLength: 3, commutative: true}, - {name: "MaskedMinInt8x64", argLength: 3, commutative: true}, - {name: "MaskedNotEqualInt8x64", argLength: 3, commutative: true}, - {name: "MaskedPopCountInt8x64", argLength: 2, commutative: false}, - {name: "MaskedSaturatedAddInt8x64", argLength: 3, commutative: true}, - {name: "MaskedSaturatedSubInt8x64", argLength: 3, commutative: false}, - {name: "MaskedSubInt8x64", argLength: 3, commutative: false}, + {name: "LessEqualMaskedInt8x64", argLength: 3, commutative: false}, + {name: "LessMaskedInt8x64", argLength: 3, commutative: false}, {name: "MaxInt8x64", argLength: 2, commutative: true}, + {name: "MaxMaskedInt8x64", argLength: 3, commutative: true}, {name: "MinInt8x64", argLength: 2, commutative: true}, + {name: "MinMaskedInt8x64", argLength: 3, commutative: true}, {name: "NotEqualInt8x64", argLength: 2, commutative: true}, + {name: "NotEqualMaskedInt8x64", argLength: 3, commutative: true}, {name: "PopCountInt8x64", argLength: 1, commutative: false}, + {name: "PopCountMaskedInt8x64", argLength: 2, commutative: false}, {name: "SaturatedAddInt8x64", argLength: 2, commutative: true}, + {name: "SaturatedAddMaskedInt8x64", argLength: 3, commutative: true}, {name: "SaturatedSubInt8x64", argLength: 2, commutative: false}, + {name: "SaturatedSubMaskedInt8x64", argLength: 3, commutative: false}, {name: "SubInt8x64", argLength: 2, commutative: false}, + {name: "SubMaskedInt8x64", argLength: 3, commutative: false}, {name: "AddUint16x16", argLength: 2, commutative: true}, + {name: "AddMaskedUint16x16", argLength: 3, commutative: true}, {name: "AndUint16x16", argLength: 2, commutative: true}, {name: "AndNotUint16x16", argLength: 2, commutative: false}, {name: "AverageUint16x16", argLength: 2, commutative: true}, + {name: "AverageMaskedUint16x16", argLength: 3, commutative: true}, {name: "EqualUint16x16", argLength: 2, commutative: true}, + {name: "EqualMaskedUint16x16", argLength: 3, commutative: true}, {name: "GreaterUint16x16", argLength: 2, commutative: false}, {name: "GreaterEqualUint16x16", argLength: 2, commutative: false}, + {name: "GreaterEqualMaskedUint16x16", argLength: 3, commutative: false}, + {name: "GreaterMaskedUint16x16", argLength: 3, commutative: false}, {name: "LessUint16x16", argLength: 2, commutative: false}, {name: "LessEqualUint16x16", argLength: 2, commutative: false}, - {name: "MaskedAddUint16x16", argLength: 3, commutative: true}, - {name: "MaskedAverageUint16x16", argLength: 3, commutative: true}, - {name: "MaskedEqualUint16x16", argLength: 3, commutative: true}, - {name: "MaskedGreaterUint16x16", argLength: 3, commutative: false}, - {name: "MaskedGreaterEqualUint16x16", argLength: 3, commutative: false}, - {name: "MaskedLessUint16x16", argLength: 3, commutative: false}, - {name: "MaskedLessEqualUint16x16", argLength: 3, commutative: false}, - {name: "MaskedMaxUint16x16", argLength: 3, commutative: true}, - {name: "MaskedMinUint16x16", argLength: 3, commutative: true}, - {name: "MaskedMulHighUint16x16", argLength: 3, commutative: true}, - {name: "MaskedNotEqualUint16x16", argLength: 3, commutative: true}, - {name: "MaskedPopCountUint16x16", argLength: 2, commutative: false}, - {name: "MaskedSaturatedAddUint16x16", argLength: 3, commutative: true}, - {name: "MaskedSaturatedSubUint16x16", argLength: 3, commutative: false}, - {name: "MaskedShiftLeftUint16x16", argLength: 3, commutative: false}, - {name: "MaskedShiftLeftAndFillUpperFromUint16x16", argLength: 4, commutative: false}, - {name: "MaskedShiftRightUint16x16", argLength: 3, commutative: false}, - {name: "MaskedShiftRightAndFillUpperFromUint16x16", argLength: 4, commutative: false}, - {name: "MaskedShiftRightSignExtendedUint16x16", argLength: 3, commutative: false}, - {name: "MaskedSubUint16x16", argLength: 3, commutative: false}, + {name: "LessEqualMaskedUint16x16", argLength: 3, commutative: false}, + {name: "LessMaskedUint16x16", argLength: 3, commutative: false}, {name: "MaxUint16x16", argLength: 2, commutative: true}, + {name: "MaxMaskedUint16x16", argLength: 3, commutative: true}, {name: "MinUint16x16", argLength: 2, commutative: true}, + {name: "MinMaskedUint16x16", argLength: 3, commutative: true}, {name: "MulHighUint16x16", argLength: 2, commutative: true}, + {name: "MulHighMaskedUint16x16", argLength: 3, commutative: true}, {name: "NotEqualUint16x16", argLength: 2, commutative: true}, + {name: "NotEqualMaskedUint16x16", argLength: 3, commutative: true}, {name: "OrUint16x16", argLength: 2, commutative: true}, {name: "PairwiseAddUint16x16", argLength: 2, commutative: false}, {name: "PairwiseSubUint16x16", argLength: 2, commutative: false}, {name: "PopCountUint16x16", argLength: 1, commutative: false}, + {name: "PopCountMaskedUint16x16", argLength: 2, commutative: false}, {name: "SaturatedAddUint16x16", argLength: 2, commutative: true}, + {name: "SaturatedAddMaskedUint16x16", argLength: 3, commutative: true}, {name: "SaturatedSubUint16x16", argLength: 2, commutative: false}, + {name: "SaturatedSubMaskedUint16x16", argLength: 3, commutative: false}, {name: "ShiftAllLeftUint16x16", argLength: 2, commutative: false}, {name: "ShiftAllRightUint16x16", argLength: 2, commutative: false}, {name: "ShiftLeftUint16x16", argLength: 2, commutative: false}, {name: "ShiftLeftAndFillUpperFromUint16x16", argLength: 3, commutative: false}, + {name: "ShiftLeftAndFillUpperFromMaskedUint16x16", argLength: 4, commutative: false}, + {name: "ShiftLeftMaskedUint16x16", argLength: 3, commutative: false}, {name: "ShiftRightUint16x16", argLength: 2, commutative: false}, {name: "ShiftRightAndFillUpperFromUint16x16", argLength: 3, commutative: false}, + {name: "ShiftRightAndFillUpperFromMaskedUint16x16", argLength: 4, commutative: false}, + {name: "ShiftRightMaskedUint16x16", argLength: 3, commutative: false}, {name: "ShiftRightSignExtendedUint16x16", argLength: 2, commutative: false}, + {name: "ShiftRightSignExtendedMaskedUint16x16", argLength: 3, commutative: false}, {name: "SubUint16x16", argLength: 2, commutative: false}, + {name: "SubMaskedUint16x16", argLength: 3, commutative: false}, {name: "XorUint16x16", argLength: 2, commutative: true}, {name: "AddUint16x32", argLength: 2, commutative: true}, + {name: "AddMaskedUint16x32", argLength: 3, commutative: true}, {name: "AverageUint16x32", argLength: 2, commutative: true}, + {name: "AverageMaskedUint16x32", argLength: 3, commutative: true}, {name: "EqualUint16x32", argLength: 2, commutative: true}, + {name: "EqualMaskedUint16x32", argLength: 3, commutative: true}, {name: "GreaterUint16x32", argLength: 2, commutative: false}, {name: "GreaterEqualUint16x32", argLength: 2, commutative: false}, + {name: "GreaterEqualMaskedUint16x32", argLength: 3, commutative: false}, + {name: "GreaterMaskedUint16x32", argLength: 3, commutative: false}, {name: "LessUint16x32", argLength: 2, commutative: false}, {name: "LessEqualUint16x32", argLength: 2, commutative: false}, - {name: "MaskedAddUint16x32", argLength: 3, commutative: true}, - {name: "MaskedAverageUint16x32", argLength: 3, commutative: true}, - {name: "MaskedEqualUint16x32", argLength: 3, commutative: true}, - {name: "MaskedGreaterUint16x32", argLength: 3, commutative: false}, - {name: "MaskedGreaterEqualUint16x32", argLength: 3, commutative: false}, - {name: "MaskedLessUint16x32", argLength: 3, commutative: false}, - {name: "MaskedLessEqualUint16x32", argLength: 3, commutative: false}, - {name: "MaskedMaxUint16x32", argLength: 3, commutative: true}, - {name: "MaskedMinUint16x32", argLength: 3, commutative: true}, - {name: "MaskedMulHighUint16x32", argLength: 3, commutative: true}, - {name: "MaskedNotEqualUint16x32", argLength: 3, commutative: true}, - {name: "MaskedPopCountUint16x32", argLength: 2, commutative: false}, - {name: "MaskedSaturatedAddUint16x32", argLength: 3, commutative: true}, - {name: "MaskedSaturatedSubUint16x32", argLength: 3, commutative: false}, - {name: "MaskedShiftLeftUint16x32", argLength: 3, commutative: false}, - {name: "MaskedShiftLeftAndFillUpperFromUint16x32", argLength: 4, commutative: false}, - {name: "MaskedShiftRightUint16x32", argLength: 3, commutative: false}, - {name: "MaskedShiftRightAndFillUpperFromUint16x32", argLength: 4, commutative: false}, - {name: "MaskedShiftRightSignExtendedUint16x32", argLength: 3, commutative: false}, - {name: "MaskedSubUint16x32", argLength: 3, commutative: false}, + {name: "LessEqualMaskedUint16x32", argLength: 3, commutative: false}, + {name: "LessMaskedUint16x32", argLength: 3, commutative: false}, {name: "MaxUint16x32", argLength: 2, commutative: true}, + {name: "MaxMaskedUint16x32", argLength: 3, commutative: true}, {name: "MinUint16x32", argLength: 2, commutative: true}, + {name: "MinMaskedUint16x32", argLength: 3, commutative: true}, {name: "MulHighUint16x32", argLength: 2, commutative: true}, + {name: "MulHighMaskedUint16x32", argLength: 3, commutative: true}, {name: "NotEqualUint16x32", argLength: 2, commutative: true}, + {name: "NotEqualMaskedUint16x32", argLength: 3, commutative: true}, {name: "PopCountUint16x32", argLength: 1, commutative: false}, + {name: "PopCountMaskedUint16x32", argLength: 2, commutative: false}, {name: "SaturatedAddUint16x32", argLength: 2, commutative: true}, + {name: "SaturatedAddMaskedUint16x32", argLength: 3, commutative: true}, {name: "SaturatedSubUint16x32", argLength: 2, commutative: false}, + {name: "SaturatedSubMaskedUint16x32", argLength: 3, commutative: false}, {name: "ShiftLeftUint16x32", argLength: 2, commutative: false}, {name: "ShiftLeftAndFillUpperFromUint16x32", argLength: 3, commutative: false}, + {name: "ShiftLeftAndFillUpperFromMaskedUint16x32", argLength: 4, commutative: false}, + {name: "ShiftLeftMaskedUint16x32", argLength: 3, commutative: false}, {name: "ShiftRightUint16x32", argLength: 2, commutative: false}, {name: "ShiftRightAndFillUpperFromUint16x32", argLength: 3, commutative: false}, + {name: "ShiftRightAndFillUpperFromMaskedUint16x32", argLength: 4, commutative: false}, + {name: "ShiftRightMaskedUint16x32", argLength: 3, commutative: false}, {name: "ShiftRightSignExtendedUint16x32", argLength: 2, commutative: false}, + {name: "ShiftRightSignExtendedMaskedUint16x32", argLength: 3, commutative: false}, {name: "SubUint16x32", argLength: 2, commutative: false}, + {name: "SubMaskedUint16x32", argLength: 3, commutative: false}, {name: "AddUint16x8", argLength: 2, commutative: true}, + {name: "AddMaskedUint16x8", argLength: 3, commutative: true}, {name: "AndUint16x8", argLength: 2, commutative: true}, {name: "AndNotUint16x8", argLength: 2, commutative: false}, {name: "AverageUint16x8", argLength: 2, commutative: true}, + {name: "AverageMaskedUint16x8", argLength: 3, commutative: true}, {name: "EqualUint16x8", argLength: 2, commutative: true}, + {name: "EqualMaskedUint16x8", argLength: 3, commutative: true}, {name: "GreaterUint16x8", argLength: 2, commutative: false}, {name: "GreaterEqualUint16x8", argLength: 2, commutative: false}, + {name: "GreaterEqualMaskedUint16x8", argLength: 3, commutative: false}, + {name: "GreaterMaskedUint16x8", argLength: 3, commutative: false}, {name: "LessUint16x8", argLength: 2, commutative: false}, {name: "LessEqualUint16x8", argLength: 2, commutative: false}, - {name: "MaskedAddUint16x8", argLength: 3, commutative: true}, - {name: "MaskedAverageUint16x8", argLength: 3, commutative: true}, - {name: "MaskedEqualUint16x8", argLength: 3, commutative: true}, - {name: "MaskedGreaterUint16x8", argLength: 3, commutative: false}, - {name: "MaskedGreaterEqualUint16x8", argLength: 3, commutative: false}, - {name: "MaskedLessUint16x8", argLength: 3, commutative: false}, - {name: "MaskedLessEqualUint16x8", argLength: 3, commutative: false}, - {name: "MaskedMaxUint16x8", argLength: 3, commutative: true}, - {name: "MaskedMinUint16x8", argLength: 3, commutative: true}, - {name: "MaskedMulHighUint16x8", argLength: 3, commutative: true}, - {name: "MaskedNotEqualUint16x8", argLength: 3, commutative: true}, - {name: "MaskedPopCountUint16x8", argLength: 2, commutative: false}, - {name: "MaskedSaturatedAddUint16x8", argLength: 3, commutative: true}, - {name: "MaskedSaturatedSubUint16x8", argLength: 3, commutative: false}, - {name: "MaskedShiftLeftUint16x8", argLength: 3, commutative: false}, - {name: "MaskedShiftLeftAndFillUpperFromUint16x8", argLength: 4, commutative: false}, - {name: "MaskedShiftRightUint16x8", argLength: 3, commutative: false}, - {name: "MaskedShiftRightAndFillUpperFromUint16x8", argLength: 4, commutative: false}, - {name: "MaskedShiftRightSignExtendedUint16x8", argLength: 3, commutative: false}, - {name: "MaskedSubUint16x8", argLength: 3, commutative: false}, + {name: "LessEqualMaskedUint16x8", argLength: 3, commutative: false}, + {name: "LessMaskedUint16x8", argLength: 3, commutative: false}, {name: "MaxUint16x8", argLength: 2, commutative: true}, + {name: "MaxMaskedUint16x8", argLength: 3, commutative: true}, {name: "MinUint16x8", argLength: 2, commutative: true}, + {name: "MinMaskedUint16x8", argLength: 3, commutative: true}, {name: "MulHighUint16x8", argLength: 2, commutative: true}, + {name: "MulHighMaskedUint16x8", argLength: 3, commutative: true}, {name: "NotEqualUint16x8", argLength: 2, commutative: true}, + {name: "NotEqualMaskedUint16x8", argLength: 3, commutative: true}, {name: "OrUint16x8", argLength: 2, commutative: true}, {name: "PairwiseAddUint16x8", argLength: 2, commutative: false}, {name: "PairwiseSubUint16x8", argLength: 2, commutative: false}, {name: "PopCountUint16x8", argLength: 1, commutative: false}, + {name: "PopCountMaskedUint16x8", argLength: 2, commutative: false}, {name: "SaturatedAddUint16x8", argLength: 2, commutative: true}, + {name: "SaturatedAddMaskedUint16x8", argLength: 3, commutative: true}, {name: "SaturatedSubUint16x8", argLength: 2, commutative: false}, + {name: "SaturatedSubMaskedUint16x8", argLength: 3, commutative: false}, {name: "ShiftAllLeftUint16x8", argLength: 2, commutative: false}, {name: "ShiftAllRightUint16x8", argLength: 2, commutative: false}, {name: "ShiftLeftUint16x8", argLength: 2, commutative: false}, {name: "ShiftLeftAndFillUpperFromUint16x8", argLength: 3, commutative: false}, + {name: "ShiftLeftAndFillUpperFromMaskedUint16x8", argLength: 4, commutative: false}, + {name: "ShiftLeftMaskedUint16x8", argLength: 3, commutative: false}, {name: "ShiftRightUint16x8", argLength: 2, commutative: false}, {name: "ShiftRightAndFillUpperFromUint16x8", argLength: 3, commutative: false}, + {name: "ShiftRightAndFillUpperFromMaskedUint16x8", argLength: 4, commutative: false}, + {name: "ShiftRightMaskedUint16x8", argLength: 3, commutative: false}, {name: "ShiftRightSignExtendedUint16x8", argLength: 2, commutative: false}, + {name: "ShiftRightSignExtendedMaskedUint16x8", argLength: 3, commutative: false}, {name: "SubUint16x8", argLength: 2, commutative: false}, + {name: "SubMaskedUint16x8", argLength: 3, commutative: false}, {name: "XorUint16x8", argLength: 2, commutative: true}, {name: "AddUint32x16", argLength: 2, commutative: true}, + {name: "AddMaskedUint32x16", argLength: 3, commutative: true}, {name: "AndUint32x16", argLength: 2, commutative: true}, + {name: "AndMaskedUint32x16", argLength: 3, commutative: true}, {name: "AndNotUint32x16", argLength: 2, commutative: false}, + {name: "AndNotMaskedUint32x16", argLength: 3, commutative: false}, {name: "EqualUint32x16", argLength: 2, commutative: true}, + {name: "EqualMaskedUint32x16", argLength: 3, commutative: true}, {name: "GreaterUint32x16", argLength: 2, commutative: false}, {name: "GreaterEqualUint32x16", argLength: 2, commutative: false}, + {name: "GreaterEqualMaskedUint32x16", argLength: 3, commutative: false}, + {name: "GreaterMaskedUint32x16", argLength: 3, commutative: false}, {name: "LessUint32x16", argLength: 2, commutative: false}, {name: "LessEqualUint32x16", argLength: 2, commutative: false}, - {name: "MaskedAddUint32x16", argLength: 3, commutative: true}, - {name: "MaskedAndUint32x16", argLength: 3, commutative: true}, - {name: "MaskedAndNotUint32x16", argLength: 3, commutative: false}, - {name: "MaskedEqualUint32x16", argLength: 3, commutative: true}, - {name: "MaskedGreaterUint32x16", argLength: 3, commutative: false}, - {name: "MaskedGreaterEqualUint32x16", argLength: 3, commutative: false}, - {name: "MaskedLessUint32x16", argLength: 3, commutative: false}, - {name: "MaskedLessEqualUint32x16", argLength: 3, commutative: false}, - {name: "MaskedMaxUint32x16", argLength: 3, commutative: true}, - {name: "MaskedMinUint32x16", argLength: 3, commutative: true}, - {name: "MaskedNotEqualUint32x16", argLength: 3, commutative: true}, - {name: "MaskedOrUint32x16", argLength: 3, commutative: true}, - {name: "MaskedPopCountUint32x16", argLength: 2, commutative: false}, - {name: "MaskedRotateLeftUint32x16", argLength: 3, commutative: false}, - {name: "MaskedRotateRightUint32x16", argLength: 3, commutative: false}, - {name: "MaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16", argLength: 4, commutative: false}, - {name: "MaskedShiftLeftUint32x16", argLength: 3, commutative: false}, - {name: "MaskedShiftLeftAndFillUpperFromUint32x16", argLength: 4, commutative: false}, - {name: "MaskedShiftRightUint32x16", argLength: 3, commutative: false}, - {name: "MaskedShiftRightAndFillUpperFromUint32x16", argLength: 4, commutative: false}, - {name: "MaskedShiftRightSignExtendedUint32x16", argLength: 3, commutative: false}, - {name: "MaskedSubUint32x16", argLength: 3, commutative: false}, - {name: "MaskedUnsignedSignedQuadDotProdAccumulateUint32x16", argLength: 4, commutative: false}, - {name: "MaskedXorUint32x16", argLength: 3, commutative: true}, + {name: "LessEqualMaskedUint32x16", argLength: 3, commutative: false}, + {name: "LessMaskedUint32x16", argLength: 3, commutative: false}, {name: "MaxUint32x16", argLength: 2, commutative: true}, + {name: "MaxMaskedUint32x16", argLength: 3, commutative: true}, {name: "MinUint32x16", argLength: 2, commutative: true}, + {name: "MinMaskedUint32x16", argLength: 3, commutative: true}, {name: "NotEqualUint32x16", argLength: 2, commutative: true}, + {name: "NotEqualMaskedUint32x16", argLength: 3, commutative: true}, {name: "OrUint32x16", argLength: 2, commutative: true}, + {name: "OrMaskedUint32x16", argLength: 3, commutative: true}, {name: "PopCountUint32x16", argLength: 1, commutative: false}, + {name: "PopCountMaskedUint32x16", argLength: 2, commutative: false}, {name: "RotateLeftUint32x16", argLength: 2, commutative: false}, + {name: "RotateLeftMaskedUint32x16", argLength: 3, commutative: false}, {name: "RotateRightUint32x16", argLength: 2, commutative: false}, + {name: "RotateRightMaskedUint32x16", argLength: 3, commutative: false}, {name: "SaturatedUnsignedSignedQuadDotProdAccumulateUint32x16", argLength: 3, commutative: false}, + {name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x16", argLength: 4, commutative: false}, {name: "ShiftLeftUint32x16", argLength: 2, commutative: false}, {name: "ShiftLeftAndFillUpperFromUint32x16", argLength: 3, commutative: false}, + {name: "ShiftLeftAndFillUpperFromMaskedUint32x16", argLength: 4, commutative: false}, + {name: "ShiftLeftMaskedUint32x16", argLength: 3, commutative: false}, {name: "ShiftRightUint32x16", argLength: 2, commutative: false}, {name: "ShiftRightAndFillUpperFromUint32x16", argLength: 3, commutative: false}, + {name: "ShiftRightAndFillUpperFromMaskedUint32x16", argLength: 4, commutative: false}, + {name: "ShiftRightMaskedUint32x16", argLength: 3, commutative: false}, {name: "ShiftRightSignExtendedUint32x16", argLength: 2, commutative: false}, + {name: "ShiftRightSignExtendedMaskedUint32x16", argLength: 3, commutative: false}, {name: "SubUint32x16", argLength: 2, commutative: false}, + {name: "SubMaskedUint32x16", argLength: 3, commutative: false}, {name: "UnsignedSignedQuadDotProdAccumulateUint32x16", argLength: 3, commutative: false}, + {name: "UnsignedSignedQuadDotProdAccumulateMaskedUint32x16", argLength: 4, commutative: false}, {name: "XorUint32x16", argLength: 2, commutative: true}, + {name: "XorMaskedUint32x16", argLength: 3, commutative: true}, {name: "AddUint32x4", argLength: 2, commutative: true}, + {name: "AddMaskedUint32x4", argLength: 3, commutative: true}, {name: "AndUint32x4", argLength: 2, commutative: true}, + {name: "AndMaskedUint32x4", argLength: 3, commutative: true}, {name: "AndNotUint32x4", argLength: 2, commutative: false}, + {name: "AndNotMaskedUint32x4", argLength: 3, commutative: false}, {name: "EqualUint32x4", argLength: 2, commutative: true}, + {name: "EqualMaskedUint32x4", argLength: 3, commutative: true}, {name: "GreaterUint32x4", argLength: 2, commutative: false}, {name: "GreaterEqualUint32x4", argLength: 2, commutative: false}, + {name: "GreaterEqualMaskedUint32x4", argLength: 3, commutative: false}, + {name: "GreaterMaskedUint32x4", argLength: 3, commutative: false}, {name: "LessUint32x4", argLength: 2, commutative: false}, {name: "LessEqualUint32x4", argLength: 2, commutative: false}, - {name: "MaskedAddUint32x4", argLength: 3, commutative: true}, - {name: "MaskedAndUint32x4", argLength: 3, commutative: true}, - {name: "MaskedAndNotUint32x4", argLength: 3, commutative: false}, - {name: "MaskedEqualUint32x4", argLength: 3, commutative: true}, - {name: "MaskedGreaterUint32x4", argLength: 3, commutative: false}, - {name: "MaskedGreaterEqualUint32x4", argLength: 3, commutative: false}, - {name: "MaskedLessUint32x4", argLength: 3, commutative: false}, - {name: "MaskedLessEqualUint32x4", argLength: 3, commutative: false}, - {name: "MaskedMaxUint32x4", argLength: 3, commutative: true}, - {name: "MaskedMinUint32x4", argLength: 3, commutative: true}, - {name: "MaskedNotEqualUint32x4", argLength: 3, commutative: true}, - {name: "MaskedOrUint32x4", argLength: 3, commutative: true}, - {name: "MaskedPopCountUint32x4", argLength: 2, commutative: false}, - {name: "MaskedRotateLeftUint32x4", argLength: 3, commutative: false}, - {name: "MaskedRotateRightUint32x4", argLength: 3, commutative: false}, - {name: "MaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4", argLength: 4, commutative: false}, - {name: "MaskedShiftLeftUint32x4", argLength: 3, commutative: false}, - {name: "MaskedShiftLeftAndFillUpperFromUint32x4", argLength: 4, commutative: false}, - {name: "MaskedShiftRightUint32x4", argLength: 3, commutative: false}, - {name: "MaskedShiftRightAndFillUpperFromUint32x4", argLength: 4, commutative: false}, - {name: "MaskedShiftRightSignExtendedUint32x4", argLength: 3, commutative: false}, - {name: "MaskedSubUint32x4", argLength: 3, commutative: false}, - {name: "MaskedUnsignedSignedQuadDotProdAccumulateUint32x4", argLength: 4, commutative: false}, - {name: "MaskedXorUint32x4", argLength: 3, commutative: true}, + {name: "LessEqualMaskedUint32x4", argLength: 3, commutative: false}, + {name: "LessMaskedUint32x4", argLength: 3, commutative: false}, {name: "MaxUint32x4", argLength: 2, commutative: true}, + {name: "MaxMaskedUint32x4", argLength: 3, commutative: true}, {name: "MinUint32x4", argLength: 2, commutative: true}, + {name: "MinMaskedUint32x4", argLength: 3, commutative: true}, {name: "MulEvenWidenUint32x4", argLength: 2, commutative: true}, {name: "NotEqualUint32x4", argLength: 2, commutative: true}, + {name: "NotEqualMaskedUint32x4", argLength: 3, commutative: true}, {name: "OrUint32x4", argLength: 2, commutative: true}, + {name: "OrMaskedUint32x4", argLength: 3, commutative: true}, {name: "PairwiseAddUint32x4", argLength: 2, commutative: false}, {name: "PairwiseSubUint32x4", argLength: 2, commutative: false}, {name: "PopCountUint32x4", argLength: 1, commutative: false}, + {name: "PopCountMaskedUint32x4", argLength: 2, commutative: false}, {name: "RotateLeftUint32x4", argLength: 2, commutative: false}, + {name: "RotateLeftMaskedUint32x4", argLength: 3, commutative: false}, {name: "RotateRightUint32x4", argLength: 2, commutative: false}, + {name: "RotateRightMaskedUint32x4", argLength: 3, commutative: false}, {name: "SaturatedUnsignedSignedQuadDotProdAccumulateUint32x4", argLength: 3, commutative: false}, + {name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x4", argLength: 4, commutative: false}, {name: "ShiftAllLeftUint32x4", argLength: 2, commutative: false}, {name: "ShiftAllRightUint32x4", argLength: 2, commutative: false}, {name: "ShiftLeftUint32x4", argLength: 2, commutative: false}, {name: "ShiftLeftAndFillUpperFromUint32x4", argLength: 3, commutative: false}, + {name: "ShiftLeftAndFillUpperFromMaskedUint32x4", argLength: 4, commutative: false}, + {name: "ShiftLeftMaskedUint32x4", argLength: 3, commutative: false}, {name: "ShiftRightUint32x4", argLength: 2, commutative: false}, {name: "ShiftRightAndFillUpperFromUint32x4", argLength: 3, commutative: false}, + {name: "ShiftRightAndFillUpperFromMaskedUint32x4", argLength: 4, commutative: false}, + {name: "ShiftRightMaskedUint32x4", argLength: 3, commutative: false}, {name: "ShiftRightSignExtendedUint32x4", argLength: 2, commutative: false}, + {name: "ShiftRightSignExtendedMaskedUint32x4", argLength: 3, commutative: false}, {name: "SubUint32x4", argLength: 2, commutative: false}, + {name: "SubMaskedUint32x4", argLength: 3, commutative: false}, {name: "UnsignedSignedQuadDotProdAccumulateUint32x4", argLength: 3, commutative: false}, + {name: "UnsignedSignedQuadDotProdAccumulateMaskedUint32x4", argLength: 4, commutative: false}, {name: "XorUint32x4", argLength: 2, commutative: true}, + {name: "XorMaskedUint32x4", argLength: 3, commutative: true}, {name: "AddUint32x8", argLength: 2, commutative: true}, + {name: "AddMaskedUint32x8", argLength: 3, commutative: true}, {name: "AndUint32x8", argLength: 2, commutative: true}, + {name: "AndMaskedUint32x8", argLength: 3, commutative: true}, {name: "AndNotUint32x8", argLength: 2, commutative: false}, + {name: "AndNotMaskedUint32x8", argLength: 3, commutative: false}, {name: "EqualUint32x8", argLength: 2, commutative: true}, + {name: "EqualMaskedUint32x8", argLength: 3, commutative: true}, {name: "GreaterUint32x8", argLength: 2, commutative: false}, {name: "GreaterEqualUint32x8", argLength: 2, commutative: false}, + {name: "GreaterEqualMaskedUint32x8", argLength: 3, commutative: false}, + {name: "GreaterMaskedUint32x8", argLength: 3, commutative: false}, {name: "LessUint32x8", argLength: 2, commutative: false}, {name: "LessEqualUint32x8", argLength: 2, commutative: false}, - {name: "MaskedAddUint32x8", argLength: 3, commutative: true}, - {name: "MaskedAndUint32x8", argLength: 3, commutative: true}, - {name: "MaskedAndNotUint32x8", argLength: 3, commutative: false}, - {name: "MaskedEqualUint32x8", argLength: 3, commutative: true}, - {name: "MaskedGreaterUint32x8", argLength: 3, commutative: false}, - {name: "MaskedGreaterEqualUint32x8", argLength: 3, commutative: false}, - {name: "MaskedLessUint32x8", argLength: 3, commutative: false}, - {name: "MaskedLessEqualUint32x8", argLength: 3, commutative: false}, - {name: "MaskedMaxUint32x8", argLength: 3, commutative: true}, - {name: "MaskedMinUint32x8", argLength: 3, commutative: true}, - {name: "MaskedNotEqualUint32x8", argLength: 3, commutative: true}, - {name: "MaskedOrUint32x8", argLength: 3, commutative: true}, - {name: "MaskedPopCountUint32x8", argLength: 2, commutative: false}, - {name: "MaskedRotateLeftUint32x8", argLength: 3, commutative: false}, - {name: "MaskedRotateRightUint32x8", argLength: 3, commutative: false}, - {name: "MaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8", argLength: 4, commutative: false}, - {name: "MaskedShiftLeftUint32x8", argLength: 3, commutative: false}, - {name: "MaskedShiftLeftAndFillUpperFromUint32x8", argLength: 4, commutative: false}, - {name: "MaskedShiftRightUint32x8", argLength: 3, commutative: false}, - {name: "MaskedShiftRightAndFillUpperFromUint32x8", argLength: 4, commutative: false}, - {name: "MaskedShiftRightSignExtendedUint32x8", argLength: 3, commutative: false}, - {name: "MaskedSubUint32x8", argLength: 3, commutative: false}, - {name: "MaskedUnsignedSignedQuadDotProdAccumulateUint32x8", argLength: 4, commutative: false}, - {name: "MaskedXorUint32x8", argLength: 3, commutative: true}, + {name: "LessEqualMaskedUint32x8", argLength: 3, commutative: false}, + {name: "LessMaskedUint32x8", argLength: 3, commutative: false}, {name: "MaxUint32x8", argLength: 2, commutative: true}, + {name: "MaxMaskedUint32x8", argLength: 3, commutative: true}, {name: "MinUint32x8", argLength: 2, commutative: true}, + {name: "MinMaskedUint32x8", argLength: 3, commutative: true}, {name: "MulEvenWidenUint32x8", argLength: 2, commutative: true}, {name: "NotEqualUint32x8", argLength: 2, commutative: true}, + {name: "NotEqualMaskedUint32x8", argLength: 3, commutative: true}, {name: "OrUint32x8", argLength: 2, commutative: true}, + {name: "OrMaskedUint32x8", argLength: 3, commutative: true}, {name: "PairwiseAddUint32x8", argLength: 2, commutative: false}, {name: "PairwiseSubUint32x8", argLength: 2, commutative: false}, {name: "PopCountUint32x8", argLength: 1, commutative: false}, + {name: "PopCountMaskedUint32x8", argLength: 2, commutative: false}, {name: "RotateLeftUint32x8", argLength: 2, commutative: false}, + {name: "RotateLeftMaskedUint32x8", argLength: 3, commutative: false}, {name: "RotateRightUint32x8", argLength: 2, commutative: false}, + {name: "RotateRightMaskedUint32x8", argLength: 3, commutative: false}, {name: "SaturatedUnsignedSignedQuadDotProdAccumulateUint32x8", argLength: 3, commutative: false}, + {name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x8", argLength: 4, commutative: false}, {name: "ShiftAllLeftUint32x8", argLength: 2, commutative: false}, {name: "ShiftAllRightUint32x8", argLength: 2, commutative: false}, {name: "ShiftLeftUint32x8", argLength: 2, commutative: false}, {name: "ShiftLeftAndFillUpperFromUint32x8", argLength: 3, commutative: false}, + {name: "ShiftLeftAndFillUpperFromMaskedUint32x8", argLength: 4, commutative: false}, + {name: "ShiftLeftMaskedUint32x8", argLength: 3, commutative: false}, {name: "ShiftRightUint32x8", argLength: 2, commutative: false}, {name: "ShiftRightAndFillUpperFromUint32x8", argLength: 3, commutative: false}, + {name: "ShiftRightAndFillUpperFromMaskedUint32x8", argLength: 4, commutative: false}, + {name: "ShiftRightMaskedUint32x8", argLength: 3, commutative: false}, {name: "ShiftRightSignExtendedUint32x8", argLength: 2, commutative: false}, + {name: "ShiftRightSignExtendedMaskedUint32x8", argLength: 3, commutative: false}, {name: "SubUint32x8", argLength: 2, commutative: false}, + {name: "SubMaskedUint32x8", argLength: 3, commutative: false}, {name: "UnsignedSignedQuadDotProdAccumulateUint32x8", argLength: 3, commutative: false}, + {name: "UnsignedSignedQuadDotProdAccumulateMaskedUint32x8", argLength: 4, commutative: false}, {name: "XorUint32x8", argLength: 2, commutative: true}, + {name: "XorMaskedUint32x8", argLength: 3, commutative: true}, {name: "AddUint64x2", argLength: 2, commutative: true}, + {name: "AddMaskedUint64x2", argLength: 3, commutative: true}, {name: "AndUint64x2", argLength: 2, commutative: true}, + {name: "AndMaskedUint64x2", argLength: 3, commutative: true}, {name: "AndNotUint64x2", argLength: 2, commutative: false}, + {name: "AndNotMaskedUint64x2", argLength: 3, commutative: false}, {name: "EqualUint64x2", argLength: 2, commutative: true}, + {name: "EqualMaskedUint64x2", argLength: 3, commutative: true}, {name: "GreaterUint64x2", argLength: 2, commutative: false}, {name: "GreaterEqualUint64x2", argLength: 2, commutative: false}, + {name: "GreaterEqualMaskedUint64x2", argLength: 3, commutative: false}, + {name: "GreaterMaskedUint64x2", argLength: 3, commutative: false}, {name: "LessUint64x2", argLength: 2, commutative: false}, {name: "LessEqualUint64x2", argLength: 2, commutative: false}, - {name: "MaskedAddUint64x2", argLength: 3, commutative: true}, - {name: "MaskedAndUint64x2", argLength: 3, commutative: true}, - {name: "MaskedAndNotUint64x2", argLength: 3, commutative: false}, - {name: "MaskedEqualUint64x2", argLength: 3, commutative: true}, - {name: "MaskedGreaterUint64x2", argLength: 3, commutative: false}, - {name: "MaskedGreaterEqualUint64x2", argLength: 3, commutative: false}, - {name: "MaskedLessUint64x2", argLength: 3, commutative: false}, - {name: "MaskedLessEqualUint64x2", argLength: 3, commutative: false}, - {name: "MaskedMaxUint64x2", argLength: 3, commutative: true}, - {name: "MaskedMinUint64x2", argLength: 3, commutative: true}, - {name: "MaskedMulEvenWidenUint64x2", argLength: 3, commutative: true}, - {name: "MaskedNotEqualUint64x2", argLength: 3, commutative: true}, - {name: "MaskedOrUint64x2", argLength: 3, commutative: true}, - {name: "MaskedPopCountUint64x2", argLength: 2, commutative: false}, - {name: "MaskedRotateLeftUint64x2", argLength: 3, commutative: false}, - {name: "MaskedRotateRightUint64x2", argLength: 3, commutative: false}, - {name: "MaskedShiftAllLeftUint64x2", argLength: 3, commutative: false}, - {name: "MaskedShiftAllRightUint64x2", argLength: 3, commutative: false}, - {name: "MaskedShiftLeftUint64x2", argLength: 3, commutative: false}, - {name: "MaskedShiftLeftAndFillUpperFromUint64x2", argLength: 4, commutative: false}, - {name: "MaskedShiftRightUint64x2", argLength: 3, commutative: false}, - {name: "MaskedShiftRightAndFillUpperFromUint64x2", argLength: 4, commutative: false}, - {name: "MaskedShiftRightSignExtendedUint64x2", argLength: 3, commutative: false}, - {name: "MaskedSubUint64x2", argLength: 3, commutative: false}, - {name: "MaskedXorUint64x2", argLength: 3, commutative: true}, + {name: "LessEqualMaskedUint64x2", argLength: 3, commutative: false}, + {name: "LessMaskedUint64x2", argLength: 3, commutative: false}, {name: "MaxUint64x2", argLength: 2, commutative: true}, + {name: "MaxMaskedUint64x2", argLength: 3, commutative: true}, {name: "MinUint64x2", argLength: 2, commutative: true}, + {name: "MinMaskedUint64x2", argLength: 3, commutative: true}, {name: "MulEvenWidenUint64x2", argLength: 2, commutative: true}, + {name: "MulEvenWidenMaskedUint64x2", argLength: 3, commutative: true}, {name: "NotEqualUint64x2", argLength: 2, commutative: true}, + {name: "NotEqualMaskedUint64x2", argLength: 3, commutative: true}, {name: "OrUint64x2", argLength: 2, commutative: true}, + {name: "OrMaskedUint64x2", argLength: 3, commutative: true}, {name: "PopCountUint64x2", argLength: 1, commutative: false}, + {name: "PopCountMaskedUint64x2", argLength: 2, commutative: false}, {name: "RotateLeftUint64x2", argLength: 2, commutative: false}, + {name: "RotateLeftMaskedUint64x2", argLength: 3, commutative: false}, {name: "RotateRightUint64x2", argLength: 2, commutative: false}, + {name: "RotateRightMaskedUint64x2", argLength: 3, commutative: false}, {name: "ShiftAllLeftUint64x2", argLength: 2, commutative: false}, + {name: "ShiftAllLeftMaskedUint64x2", argLength: 3, commutative: false}, {name: "ShiftAllRightUint64x2", argLength: 2, commutative: false}, + {name: "ShiftAllRightMaskedUint64x2", argLength: 3, commutative: false}, {name: "ShiftLeftUint64x2", argLength: 2, commutative: false}, {name: "ShiftLeftAndFillUpperFromUint64x2", argLength: 3, commutative: false}, + {name: "ShiftLeftAndFillUpperFromMaskedUint64x2", argLength: 4, commutative: false}, + {name: "ShiftLeftMaskedUint64x2", argLength: 3, commutative: false}, {name: "ShiftRightUint64x2", argLength: 2, commutative: false}, {name: "ShiftRightAndFillUpperFromUint64x2", argLength: 3, commutative: false}, + {name: "ShiftRightAndFillUpperFromMaskedUint64x2", argLength: 4, commutative: false}, + {name: "ShiftRightMaskedUint64x2", argLength: 3, commutative: false}, {name: "ShiftRightSignExtendedUint64x2", argLength: 2, commutative: false}, + {name: "ShiftRightSignExtendedMaskedUint64x2", argLength: 3, commutative: false}, {name: "SubUint64x2", argLength: 2, commutative: false}, + {name: "SubMaskedUint64x2", argLength: 3, commutative: false}, {name: "XorUint64x2", argLength: 2, commutative: true}, + {name: "XorMaskedUint64x2", argLength: 3, commutative: true}, {name: "AddUint64x4", argLength: 2, commutative: true}, + {name: "AddMaskedUint64x4", argLength: 3, commutative: true}, {name: "AndUint64x4", argLength: 2, commutative: true}, + {name: "AndMaskedUint64x4", argLength: 3, commutative: true}, {name: "AndNotUint64x4", argLength: 2, commutative: false}, + {name: "AndNotMaskedUint64x4", argLength: 3, commutative: false}, {name: "EqualUint64x4", argLength: 2, commutative: true}, + {name: "EqualMaskedUint64x4", argLength: 3, commutative: true}, {name: "GreaterUint64x4", argLength: 2, commutative: false}, {name: "GreaterEqualUint64x4", argLength: 2, commutative: false}, + {name: "GreaterEqualMaskedUint64x4", argLength: 3, commutative: false}, + {name: "GreaterMaskedUint64x4", argLength: 3, commutative: false}, {name: "LessUint64x4", argLength: 2, commutative: false}, {name: "LessEqualUint64x4", argLength: 2, commutative: false}, - {name: "MaskedAddUint64x4", argLength: 3, commutative: true}, - {name: "MaskedAndUint64x4", argLength: 3, commutative: true}, - {name: "MaskedAndNotUint64x4", argLength: 3, commutative: false}, - {name: "MaskedEqualUint64x4", argLength: 3, commutative: true}, - {name: "MaskedGreaterUint64x4", argLength: 3, commutative: false}, - {name: "MaskedGreaterEqualUint64x4", argLength: 3, commutative: false}, - {name: "MaskedLessUint64x4", argLength: 3, commutative: false}, - {name: "MaskedLessEqualUint64x4", argLength: 3, commutative: false}, - {name: "MaskedMaxUint64x4", argLength: 3, commutative: true}, - {name: "MaskedMinUint64x4", argLength: 3, commutative: true}, - {name: "MaskedMulEvenWidenUint64x4", argLength: 3, commutative: true}, - {name: "MaskedNotEqualUint64x4", argLength: 3, commutative: true}, - {name: "MaskedOrUint64x4", argLength: 3, commutative: true}, - {name: "MaskedPopCountUint64x4", argLength: 2, commutative: false}, - {name: "MaskedRotateLeftUint64x4", argLength: 3, commutative: false}, - {name: "MaskedRotateRightUint64x4", argLength: 3, commutative: false}, - {name: "MaskedShiftAllLeftUint64x4", argLength: 3, commutative: false}, - {name: "MaskedShiftAllRightUint64x4", argLength: 3, commutative: false}, - {name: "MaskedShiftLeftUint64x4", argLength: 3, commutative: false}, - {name: "MaskedShiftLeftAndFillUpperFromUint64x4", argLength: 4, commutative: false}, - {name: "MaskedShiftRightUint64x4", argLength: 3, commutative: false}, - {name: "MaskedShiftRightAndFillUpperFromUint64x4", argLength: 4, commutative: false}, - {name: "MaskedShiftRightSignExtendedUint64x4", argLength: 3, commutative: false}, - {name: "MaskedSubUint64x4", argLength: 3, commutative: false}, - {name: "MaskedXorUint64x4", argLength: 3, commutative: true}, + {name: "LessEqualMaskedUint64x4", argLength: 3, commutative: false}, + {name: "LessMaskedUint64x4", argLength: 3, commutative: false}, {name: "MaxUint64x4", argLength: 2, commutative: true}, + {name: "MaxMaskedUint64x4", argLength: 3, commutative: true}, {name: "MinUint64x4", argLength: 2, commutative: true}, + {name: "MinMaskedUint64x4", argLength: 3, commutative: true}, {name: "MulEvenWidenUint64x4", argLength: 2, commutative: true}, + {name: "MulEvenWidenMaskedUint64x4", argLength: 3, commutative: true}, {name: "NotEqualUint64x4", argLength: 2, commutative: true}, + {name: "NotEqualMaskedUint64x4", argLength: 3, commutative: true}, {name: "OrUint64x4", argLength: 2, commutative: true}, + {name: "OrMaskedUint64x4", argLength: 3, commutative: true}, {name: "PopCountUint64x4", argLength: 1, commutative: false}, + {name: "PopCountMaskedUint64x4", argLength: 2, commutative: false}, {name: "RotateLeftUint64x4", argLength: 2, commutative: false}, + {name: "RotateLeftMaskedUint64x4", argLength: 3, commutative: false}, {name: "RotateRightUint64x4", argLength: 2, commutative: false}, + {name: "RotateRightMaskedUint64x4", argLength: 3, commutative: false}, {name: "ShiftAllLeftUint64x4", argLength: 2, commutative: false}, + {name: "ShiftAllLeftMaskedUint64x4", argLength: 3, commutative: false}, {name: "ShiftAllRightUint64x4", argLength: 2, commutative: false}, + {name: "ShiftAllRightMaskedUint64x4", argLength: 3, commutative: false}, {name: "ShiftLeftUint64x4", argLength: 2, commutative: false}, {name: "ShiftLeftAndFillUpperFromUint64x4", argLength: 3, commutative: false}, + {name: "ShiftLeftAndFillUpperFromMaskedUint64x4", argLength: 4, commutative: false}, + {name: "ShiftLeftMaskedUint64x4", argLength: 3, commutative: false}, {name: "ShiftRightUint64x4", argLength: 2, commutative: false}, {name: "ShiftRightAndFillUpperFromUint64x4", argLength: 3, commutative: false}, + {name: "ShiftRightAndFillUpperFromMaskedUint64x4", argLength: 4, commutative: false}, + {name: "ShiftRightMaskedUint64x4", argLength: 3, commutative: false}, {name: "ShiftRightSignExtendedUint64x4", argLength: 2, commutative: false}, + {name: "ShiftRightSignExtendedMaskedUint64x4", argLength: 3, commutative: false}, {name: "SubUint64x4", argLength: 2, commutative: false}, + {name: "SubMaskedUint64x4", argLength: 3, commutative: false}, {name: "XorUint64x4", argLength: 2, commutative: true}, + {name: "XorMaskedUint64x4", argLength: 3, commutative: true}, {name: "AddUint64x8", argLength: 2, commutative: true}, + {name: "AddMaskedUint64x8", argLength: 3, commutative: true}, {name: "AndUint64x8", argLength: 2, commutative: true}, + {name: "AndMaskedUint64x8", argLength: 3, commutative: true}, {name: "AndNotUint64x8", argLength: 2, commutative: false}, + {name: "AndNotMaskedUint64x8", argLength: 3, commutative: false}, {name: "EqualUint64x8", argLength: 2, commutative: true}, + {name: "EqualMaskedUint64x8", argLength: 3, commutative: true}, {name: "GreaterUint64x8", argLength: 2, commutative: false}, {name: "GreaterEqualUint64x8", argLength: 2, commutative: false}, + {name: "GreaterEqualMaskedUint64x8", argLength: 3, commutative: false}, + {name: "GreaterMaskedUint64x8", argLength: 3, commutative: false}, {name: "LessUint64x8", argLength: 2, commutative: false}, {name: "LessEqualUint64x8", argLength: 2, commutative: false}, - {name: "MaskedAddUint64x8", argLength: 3, commutative: true}, - {name: "MaskedAndUint64x8", argLength: 3, commutative: true}, - {name: "MaskedAndNotUint64x8", argLength: 3, commutative: false}, - {name: "MaskedEqualUint64x8", argLength: 3, commutative: true}, - {name: "MaskedGreaterUint64x8", argLength: 3, commutative: false}, - {name: "MaskedGreaterEqualUint64x8", argLength: 3, commutative: false}, - {name: "MaskedLessUint64x8", argLength: 3, commutative: false}, - {name: "MaskedLessEqualUint64x8", argLength: 3, commutative: false}, - {name: "MaskedMaxUint64x8", argLength: 3, commutative: true}, - {name: "MaskedMinUint64x8", argLength: 3, commutative: true}, - {name: "MaskedMulEvenWidenUint64x8", argLength: 3, commutative: true}, - {name: "MaskedNotEqualUint64x8", argLength: 3, commutative: true}, - {name: "MaskedOrUint64x8", argLength: 3, commutative: true}, - {name: "MaskedPopCountUint64x8", argLength: 2, commutative: false}, - {name: "MaskedRotateLeftUint64x8", argLength: 3, commutative: false}, - {name: "MaskedRotateRightUint64x8", argLength: 3, commutative: false}, - {name: "MaskedShiftAllLeftUint64x8", argLength: 3, commutative: false}, - {name: "MaskedShiftAllRightUint64x8", argLength: 3, commutative: false}, - {name: "MaskedShiftLeftUint64x8", argLength: 3, commutative: false}, - {name: "MaskedShiftLeftAndFillUpperFromUint64x8", argLength: 4, commutative: false}, - {name: "MaskedShiftRightUint64x8", argLength: 3, commutative: false}, - {name: "MaskedShiftRightAndFillUpperFromUint64x8", argLength: 4, commutative: false}, - {name: "MaskedShiftRightSignExtendedUint64x8", argLength: 3, commutative: false}, - {name: "MaskedSubUint64x8", argLength: 3, commutative: false}, - {name: "MaskedXorUint64x8", argLength: 3, commutative: true}, + {name: "LessEqualMaskedUint64x8", argLength: 3, commutative: false}, + {name: "LessMaskedUint64x8", argLength: 3, commutative: false}, {name: "MaxUint64x8", argLength: 2, commutative: true}, + {name: "MaxMaskedUint64x8", argLength: 3, commutative: true}, {name: "MinUint64x8", argLength: 2, commutative: true}, + {name: "MinMaskedUint64x8", argLength: 3, commutative: true}, {name: "MulEvenWidenUint64x8", argLength: 2, commutative: true}, + {name: "MulEvenWidenMaskedUint64x8", argLength: 3, commutative: true}, {name: "NotEqualUint64x8", argLength: 2, commutative: true}, + {name: "NotEqualMaskedUint64x8", argLength: 3, commutative: true}, {name: "OrUint64x8", argLength: 2, commutative: true}, + {name: "OrMaskedUint64x8", argLength: 3, commutative: true}, {name: "PopCountUint64x8", argLength: 1, commutative: false}, + {name: "PopCountMaskedUint64x8", argLength: 2, commutative: false}, {name: "RotateLeftUint64x8", argLength: 2, commutative: false}, + {name: "RotateLeftMaskedUint64x8", argLength: 3, commutative: false}, {name: "RotateRightUint64x8", argLength: 2, commutative: false}, + {name: "RotateRightMaskedUint64x8", argLength: 3, commutative: false}, {name: "ShiftAllLeftUint64x8", argLength: 2, commutative: false}, + {name: "ShiftAllLeftMaskedUint64x8", argLength: 3, commutative: false}, {name: "ShiftAllRightUint64x8", argLength: 2, commutative: false}, + {name: "ShiftAllRightMaskedUint64x8", argLength: 3, commutative: false}, {name: "ShiftLeftUint64x8", argLength: 2, commutative: false}, {name: "ShiftLeftAndFillUpperFromUint64x8", argLength: 3, commutative: false}, + {name: "ShiftLeftAndFillUpperFromMaskedUint64x8", argLength: 4, commutative: false}, + {name: "ShiftLeftMaskedUint64x8", argLength: 3, commutative: false}, {name: "ShiftRightUint64x8", argLength: 2, commutative: false}, {name: "ShiftRightAndFillUpperFromUint64x8", argLength: 3, commutative: false}, + {name: "ShiftRightAndFillUpperFromMaskedUint64x8", argLength: 4, commutative: false}, + {name: "ShiftRightMaskedUint64x8", argLength: 3, commutative: false}, {name: "ShiftRightSignExtendedUint64x8", argLength: 2, commutative: false}, + {name: "ShiftRightSignExtendedMaskedUint64x8", argLength: 3, commutative: false}, {name: "SubUint64x8", argLength: 2, commutative: false}, + {name: "SubMaskedUint64x8", argLength: 3, commutative: false}, {name: "XorUint64x8", argLength: 2, commutative: true}, + {name: "XorMaskedUint64x8", argLength: 3, commutative: true}, {name: "AddUint8x16", argLength: 2, commutative: true}, + {name: "AddMaskedUint8x16", argLength: 3, commutative: true}, {name: "AndUint8x16", argLength: 2, commutative: true}, {name: "AndNotUint8x16", argLength: 2, commutative: false}, {name: "AverageUint8x16", argLength: 2, commutative: true}, + {name: "AverageMaskedUint8x16", argLength: 3, commutative: true}, {name: "EqualUint8x16", argLength: 2, commutative: true}, + {name: "EqualMaskedUint8x16", argLength: 3, commutative: true}, {name: "GaloisFieldMulUint8x16", argLength: 2, commutative: false}, + {name: "GaloisFieldMulMaskedUint8x16", argLength: 3, commutative: false}, {name: "GreaterUint8x16", argLength: 2, commutative: false}, {name: "GreaterEqualUint8x16", argLength: 2, commutative: false}, + {name: "GreaterEqualMaskedUint8x16", argLength: 3, commutative: false}, + {name: "GreaterMaskedUint8x16", argLength: 3, commutative: false}, {name: "LessUint8x16", argLength: 2, commutative: false}, {name: "LessEqualUint8x16", argLength: 2, commutative: false}, - {name: "MaskedAddUint8x16", argLength: 3, commutative: true}, - {name: "MaskedAverageUint8x16", argLength: 3, commutative: true}, - {name: "MaskedEqualUint8x16", argLength: 3, commutative: true}, - {name: "MaskedGaloisFieldMulUint8x16", argLength: 3, commutative: false}, - {name: "MaskedGreaterUint8x16", argLength: 3, commutative: false}, - {name: "MaskedGreaterEqualUint8x16", argLength: 3, commutative: false}, - {name: "MaskedLessUint8x16", argLength: 3, commutative: false}, - {name: "MaskedLessEqualUint8x16", argLength: 3, commutative: false}, - {name: "MaskedMaxUint8x16", argLength: 3, commutative: true}, - {name: "MaskedMinUint8x16", argLength: 3, commutative: true}, - {name: "MaskedNotEqualUint8x16", argLength: 3, commutative: true}, - {name: "MaskedPopCountUint8x16", argLength: 2, commutative: false}, - {name: "MaskedSaturatedAddUint8x16", argLength: 3, commutative: true}, - {name: "MaskedSaturatedSubUint8x16", argLength: 3, commutative: false}, - {name: "MaskedSaturatedUnsignedSignedPairDotProdUint8x16", argLength: 3, commutative: false}, - {name: "MaskedSubUint8x16", argLength: 3, commutative: false}, + {name: "LessEqualMaskedUint8x16", argLength: 3, commutative: false}, + {name: "LessMaskedUint8x16", argLength: 3, commutative: false}, {name: "MaxUint8x16", argLength: 2, commutative: true}, + {name: "MaxMaskedUint8x16", argLength: 3, commutative: true}, {name: "MinUint8x16", argLength: 2, commutative: true}, + {name: "MinMaskedUint8x16", argLength: 3, commutative: true}, {name: "NotEqualUint8x16", argLength: 2, commutative: true}, + {name: "NotEqualMaskedUint8x16", argLength: 3, commutative: true}, {name: "OrUint8x16", argLength: 2, commutative: true}, {name: "PopCountUint8x16", argLength: 1, commutative: false}, + {name: "PopCountMaskedUint8x16", argLength: 2, commutative: false}, {name: "SaturatedAddUint8x16", argLength: 2, commutative: true}, + {name: "SaturatedAddMaskedUint8x16", argLength: 3, commutative: true}, {name: "SaturatedSubUint8x16", argLength: 2, commutative: false}, + {name: "SaturatedSubMaskedUint8x16", argLength: 3, commutative: false}, {name: "SaturatedUnsignedSignedPairDotProdUint8x16", argLength: 2, commutative: false}, + {name: "SaturatedUnsignedSignedPairDotProdMaskedUint8x16", argLength: 3, commutative: false}, {name: "SubUint8x16", argLength: 2, commutative: false}, + {name: "SubMaskedUint8x16", argLength: 3, commutative: false}, {name: "XorUint8x16", argLength: 2, commutative: true}, {name: "AddUint8x32", argLength: 2, commutative: true}, + {name: "AddMaskedUint8x32", argLength: 3, commutative: true}, {name: "AndUint8x32", argLength: 2, commutative: true}, {name: "AndNotUint8x32", argLength: 2, commutative: false}, {name: "AverageUint8x32", argLength: 2, commutative: true}, + {name: "AverageMaskedUint8x32", argLength: 3, commutative: true}, {name: "EqualUint8x32", argLength: 2, commutative: true}, + {name: "EqualMaskedUint8x32", argLength: 3, commutative: true}, {name: "GaloisFieldMulUint8x32", argLength: 2, commutative: false}, + {name: "GaloisFieldMulMaskedUint8x32", argLength: 3, commutative: false}, {name: "GreaterUint8x32", argLength: 2, commutative: false}, {name: "GreaterEqualUint8x32", argLength: 2, commutative: false}, + {name: "GreaterEqualMaskedUint8x32", argLength: 3, commutative: false}, + {name: "GreaterMaskedUint8x32", argLength: 3, commutative: false}, {name: "LessUint8x32", argLength: 2, commutative: false}, {name: "LessEqualUint8x32", argLength: 2, commutative: false}, - {name: "MaskedAddUint8x32", argLength: 3, commutative: true}, - {name: "MaskedAverageUint8x32", argLength: 3, commutative: true}, - {name: "MaskedEqualUint8x32", argLength: 3, commutative: true}, - {name: "MaskedGaloisFieldMulUint8x32", argLength: 3, commutative: false}, - {name: "MaskedGreaterUint8x32", argLength: 3, commutative: false}, - {name: "MaskedGreaterEqualUint8x32", argLength: 3, commutative: false}, - {name: "MaskedLessUint8x32", argLength: 3, commutative: false}, - {name: "MaskedLessEqualUint8x32", argLength: 3, commutative: false}, - {name: "MaskedMaxUint8x32", argLength: 3, commutative: true}, - {name: "MaskedMinUint8x32", argLength: 3, commutative: true}, - {name: "MaskedNotEqualUint8x32", argLength: 3, commutative: true}, - {name: "MaskedPopCountUint8x32", argLength: 2, commutative: false}, - {name: "MaskedSaturatedAddUint8x32", argLength: 3, commutative: true}, - {name: "MaskedSaturatedSubUint8x32", argLength: 3, commutative: false}, - {name: "MaskedSaturatedUnsignedSignedPairDotProdUint8x32", argLength: 3, commutative: false}, - {name: "MaskedSubUint8x32", argLength: 3, commutative: false}, + {name: "LessEqualMaskedUint8x32", argLength: 3, commutative: false}, + {name: "LessMaskedUint8x32", argLength: 3, commutative: false}, {name: "MaxUint8x32", argLength: 2, commutative: true}, + {name: "MaxMaskedUint8x32", argLength: 3, commutative: true}, {name: "MinUint8x32", argLength: 2, commutative: true}, + {name: "MinMaskedUint8x32", argLength: 3, commutative: true}, {name: "NotEqualUint8x32", argLength: 2, commutative: true}, + {name: "NotEqualMaskedUint8x32", argLength: 3, commutative: true}, {name: "OrUint8x32", argLength: 2, commutative: true}, {name: "PopCountUint8x32", argLength: 1, commutative: false}, + {name: "PopCountMaskedUint8x32", argLength: 2, commutative: false}, {name: "SaturatedAddUint8x32", argLength: 2, commutative: true}, + {name: "SaturatedAddMaskedUint8x32", argLength: 3, commutative: true}, {name: "SaturatedSubUint8x32", argLength: 2, commutative: false}, + {name: "SaturatedSubMaskedUint8x32", argLength: 3, commutative: false}, {name: "SaturatedUnsignedSignedPairDotProdUint8x32", argLength: 2, commutative: false}, + {name: "SaturatedUnsignedSignedPairDotProdMaskedUint8x32", argLength: 3, commutative: false}, {name: "SubUint8x32", argLength: 2, commutative: false}, + {name: "SubMaskedUint8x32", argLength: 3, commutative: false}, {name: "XorUint8x32", argLength: 2, commutative: true}, {name: "AddUint8x64", argLength: 2, commutative: true}, + {name: "AddMaskedUint8x64", argLength: 3, commutative: true}, {name: "AverageUint8x64", argLength: 2, commutative: true}, + {name: "AverageMaskedUint8x64", argLength: 3, commutative: true}, {name: "EqualUint8x64", argLength: 2, commutative: true}, + {name: "EqualMaskedUint8x64", argLength: 3, commutative: true}, {name: "GaloisFieldMulUint8x64", argLength: 2, commutative: false}, + {name: "GaloisFieldMulMaskedUint8x64", argLength: 3, commutative: false}, {name: "GreaterUint8x64", argLength: 2, commutative: false}, {name: "GreaterEqualUint8x64", argLength: 2, commutative: false}, + {name: "GreaterEqualMaskedUint8x64", argLength: 3, commutative: false}, + {name: "GreaterMaskedUint8x64", argLength: 3, commutative: false}, {name: "LessUint8x64", argLength: 2, commutative: false}, {name: "LessEqualUint8x64", argLength: 2, commutative: false}, - {name: "MaskedAddUint8x64", argLength: 3, commutative: true}, - {name: "MaskedAverageUint8x64", argLength: 3, commutative: true}, - {name: "MaskedEqualUint8x64", argLength: 3, commutative: true}, - {name: "MaskedGaloisFieldMulUint8x64", argLength: 3, commutative: false}, - {name: "MaskedGreaterUint8x64", argLength: 3, commutative: false}, - {name: "MaskedGreaterEqualUint8x64", argLength: 3, commutative: false}, - {name: "MaskedLessUint8x64", argLength: 3, commutative: false}, - {name: "MaskedLessEqualUint8x64", argLength: 3, commutative: false}, - {name: "MaskedMaxUint8x64", argLength: 3, commutative: true}, - {name: "MaskedMinUint8x64", argLength: 3, commutative: true}, - {name: "MaskedNotEqualUint8x64", argLength: 3, commutative: true}, - {name: "MaskedPopCountUint8x64", argLength: 2, commutative: false}, - {name: "MaskedSaturatedAddUint8x64", argLength: 3, commutative: true}, - {name: "MaskedSaturatedSubUint8x64", argLength: 3, commutative: false}, - {name: "MaskedSaturatedUnsignedSignedPairDotProdUint8x64", argLength: 3, commutative: false}, - {name: "MaskedSubUint8x64", argLength: 3, commutative: false}, + {name: "LessEqualMaskedUint8x64", argLength: 3, commutative: false}, + {name: "LessMaskedUint8x64", argLength: 3, commutative: false}, {name: "MaxUint8x64", argLength: 2, commutative: true}, + {name: "MaxMaskedUint8x64", argLength: 3, commutative: true}, {name: "MinUint8x64", argLength: 2, commutative: true}, + {name: "MinMaskedUint8x64", argLength: 3, commutative: true}, {name: "NotEqualUint8x64", argLength: 2, commutative: true}, + {name: "NotEqualMaskedUint8x64", argLength: 3, commutative: true}, {name: "PopCountUint8x64", argLength: 1, commutative: false}, + {name: "PopCountMaskedUint8x64", argLength: 2, commutative: false}, {name: "SaturatedAddUint8x64", argLength: 2, commutative: true}, + {name: "SaturatedAddMaskedUint8x64", argLength: 3, commutative: true}, {name: "SaturatedSubUint8x64", argLength: 2, commutative: false}, + {name: "SaturatedSubMaskedUint8x64", argLength: 3, commutative: false}, {name: "SaturatedUnsignedSignedPairDotProdUint8x64", argLength: 2, commutative: false}, + {name: "SaturatedUnsignedSignedPairDotProdMaskedUint8x64", argLength: 3, commutative: false}, {name: "SubUint8x64", argLength: 2, commutative: false}, + {name: "SubMaskedUint8x64", argLength: 3, commutative: false}, {name: "CeilWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"}, + {name: "CeilWithPrecisionMaskedFloat32x16", argLength: 2, commutative: false, aux: "Int8"}, {name: "DiffWithCeilWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithCeilWithPrecisionMaskedFloat32x16", argLength: 2, commutative: false, aux: "Int8"}, {name: "DiffWithFloorWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithFloorWithPrecisionMaskedFloat32x16", argLength: 2, commutative: false, aux: "Int8"}, {name: "DiffWithRoundWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithRoundWithPrecisionMaskedFloat32x16", argLength: 2, commutative: false, aux: "Int8"}, {name: "DiffWithTruncWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithTruncWithPrecisionMaskedFloat32x16", argLength: 2, commutative: false, aux: "Int8"}, {name: "FloorWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"}, - {name: "MaskedCeilWithPrecisionFloat32x16", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedDiffWithCeilWithPrecisionFloat32x16", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedDiffWithFloorWithPrecisionFloat32x16", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedDiffWithRoundWithPrecisionFloat32x16", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedDiffWithTruncWithPrecisionFloat32x16", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedFloorWithPrecisionFloat32x16", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedRoundWithPrecisionFloat32x16", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedTruncWithPrecisionFloat32x16", argLength: 2, commutative: false, aux: "Int8"}, + {name: "FloorWithPrecisionMaskedFloat32x16", argLength: 2, commutative: false, aux: "Int8"}, {name: "RoundWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"}, + {name: "RoundWithPrecisionMaskedFloat32x16", argLength: 2, commutative: false, aux: "Int8"}, {name: "TruncWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"}, + {name: "TruncWithPrecisionMaskedFloat32x16", argLength: 2, commutative: false, aux: "Int8"}, {name: "CeilWithPrecisionFloat32x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "CeilWithPrecisionMaskedFloat32x4", argLength: 2, commutative: false, aux: "Int8"}, {name: "DiffWithCeilWithPrecisionFloat32x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithCeilWithPrecisionMaskedFloat32x4", argLength: 2, commutative: false, aux: "Int8"}, {name: "DiffWithFloorWithPrecisionFloat32x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithFloorWithPrecisionMaskedFloat32x4", argLength: 2, commutative: false, aux: "Int8"}, {name: "DiffWithRoundWithPrecisionFloat32x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithRoundWithPrecisionMaskedFloat32x4", argLength: 2, commutative: false, aux: "Int8"}, {name: "DiffWithTruncWithPrecisionFloat32x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithTruncWithPrecisionMaskedFloat32x4", argLength: 2, commutative: false, aux: "Int8"}, {name: "FloorWithPrecisionFloat32x4", argLength: 1, commutative: false, aux: "Int8"}, - {name: "MaskedCeilWithPrecisionFloat32x4", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedDiffWithCeilWithPrecisionFloat32x4", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedDiffWithFloorWithPrecisionFloat32x4", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedDiffWithRoundWithPrecisionFloat32x4", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedDiffWithTruncWithPrecisionFloat32x4", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedFloorWithPrecisionFloat32x4", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedRoundWithPrecisionFloat32x4", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedTruncWithPrecisionFloat32x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "FloorWithPrecisionMaskedFloat32x4", argLength: 2, commutative: false, aux: "Int8"}, {name: "RoundWithPrecisionFloat32x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "RoundWithPrecisionMaskedFloat32x4", argLength: 2, commutative: false, aux: "Int8"}, {name: "TruncWithPrecisionFloat32x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "TruncWithPrecisionMaskedFloat32x4", argLength: 2, commutative: false, aux: "Int8"}, {name: "CeilWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "CeilWithPrecisionMaskedFloat32x8", argLength: 2, commutative: false, aux: "Int8"}, {name: "DiffWithCeilWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithCeilWithPrecisionMaskedFloat32x8", argLength: 2, commutative: false, aux: "Int8"}, {name: "DiffWithFloorWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithFloorWithPrecisionMaskedFloat32x8", argLength: 2, commutative: false, aux: "Int8"}, {name: "DiffWithRoundWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithRoundWithPrecisionMaskedFloat32x8", argLength: 2, commutative: false, aux: "Int8"}, {name: "DiffWithTruncWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithTruncWithPrecisionMaskedFloat32x8", argLength: 2, commutative: false, aux: "Int8"}, {name: "FloorWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "FloorWithPrecisionMaskedFloat32x8", argLength: 2, commutative: false, aux: "Int8"}, {name: "Get128Float32x8", argLength: 1, commutative: false, aux: "Int8"}, - {name: "MaskedCeilWithPrecisionFloat32x8", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedDiffWithCeilWithPrecisionFloat32x8", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedDiffWithFloorWithPrecisionFloat32x8", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedDiffWithRoundWithPrecisionFloat32x8", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedDiffWithTruncWithPrecisionFloat32x8", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedFloorWithPrecisionFloat32x8", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedRoundWithPrecisionFloat32x8", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedTruncWithPrecisionFloat32x8", argLength: 2, commutative: false, aux: "Int8"}, {name: "RoundWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "RoundWithPrecisionMaskedFloat32x8", argLength: 2, commutative: false, aux: "Int8"}, {name: "Set128Float32x8", argLength: 2, commutative: false, aux: "Int8"}, {name: "TruncWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "TruncWithPrecisionMaskedFloat32x8", argLength: 2, commutative: false, aux: "Int8"}, {name: "CeilWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"}, + {name: "CeilWithPrecisionMaskedFloat64x2", argLength: 2, commutative: false, aux: "Int8"}, {name: "DiffWithCeilWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithCeilWithPrecisionMaskedFloat64x2", argLength: 2, commutative: false, aux: "Int8"}, {name: "DiffWithFloorWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithFloorWithPrecisionMaskedFloat64x2", argLength: 2, commutative: false, aux: "Int8"}, {name: "DiffWithRoundWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithRoundWithPrecisionMaskedFloat64x2", argLength: 2, commutative: false, aux: "Int8"}, {name: "DiffWithTruncWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithTruncWithPrecisionMaskedFloat64x2", argLength: 2, commutative: false, aux: "Int8"}, {name: "FloorWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"}, - {name: "MaskedCeilWithPrecisionFloat64x2", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedDiffWithCeilWithPrecisionFloat64x2", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedDiffWithFloorWithPrecisionFloat64x2", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedDiffWithRoundWithPrecisionFloat64x2", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedDiffWithTruncWithPrecisionFloat64x2", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedFloorWithPrecisionFloat64x2", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedRoundWithPrecisionFloat64x2", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedTruncWithPrecisionFloat64x2", argLength: 2, commutative: false, aux: "Int8"}, + {name: "FloorWithPrecisionMaskedFloat64x2", argLength: 2, commutative: false, aux: "Int8"}, {name: "RoundWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"}, + {name: "RoundWithPrecisionMaskedFloat64x2", argLength: 2, commutative: false, aux: "Int8"}, {name: "TruncWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"}, + {name: "TruncWithPrecisionMaskedFloat64x2", argLength: 2, commutative: false, aux: "Int8"}, {name: "CeilWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "CeilWithPrecisionMaskedFloat64x4", argLength: 2, commutative: false, aux: "Int8"}, {name: "DiffWithCeilWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithCeilWithPrecisionMaskedFloat64x4", argLength: 2, commutative: false, aux: "Int8"}, {name: "DiffWithFloorWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithFloorWithPrecisionMaskedFloat64x4", argLength: 2, commutative: false, aux: "Int8"}, {name: "DiffWithRoundWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithRoundWithPrecisionMaskedFloat64x4", argLength: 2, commutative: false, aux: "Int8"}, {name: "DiffWithTruncWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithTruncWithPrecisionMaskedFloat64x4", argLength: 2, commutative: false, aux: "Int8"}, {name: "FloorWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "FloorWithPrecisionMaskedFloat64x4", argLength: 2, commutative: false, aux: "Int8"}, {name: "Get128Float64x4", argLength: 1, commutative: false, aux: "Int8"}, - {name: "MaskedCeilWithPrecisionFloat64x4", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedDiffWithCeilWithPrecisionFloat64x4", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedDiffWithFloorWithPrecisionFloat64x4", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedDiffWithRoundWithPrecisionFloat64x4", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedDiffWithTruncWithPrecisionFloat64x4", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedFloorWithPrecisionFloat64x4", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedRoundWithPrecisionFloat64x4", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedTruncWithPrecisionFloat64x4", argLength: 2, commutative: false, aux: "Int8"}, {name: "RoundWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "RoundWithPrecisionMaskedFloat64x4", argLength: 2, commutative: false, aux: "Int8"}, {name: "Set128Float64x4", argLength: 2, commutative: false, aux: "Int8"}, {name: "TruncWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "TruncWithPrecisionMaskedFloat64x4", argLength: 2, commutative: false, aux: "Int8"}, {name: "CeilWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "CeilWithPrecisionMaskedFloat64x8", argLength: 2, commutative: false, aux: "Int8"}, {name: "DiffWithCeilWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithCeilWithPrecisionMaskedFloat64x8", argLength: 2, commutative: false, aux: "Int8"}, {name: "DiffWithFloorWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithFloorWithPrecisionMaskedFloat64x8", argLength: 2, commutative: false, aux: "Int8"}, {name: "DiffWithRoundWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithRoundWithPrecisionMaskedFloat64x8", argLength: 2, commutative: false, aux: "Int8"}, {name: "DiffWithTruncWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "DiffWithTruncWithPrecisionMaskedFloat64x8", argLength: 2, commutative: false, aux: "Int8"}, {name: "FloorWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"}, - {name: "MaskedCeilWithPrecisionFloat64x8", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedDiffWithCeilWithPrecisionFloat64x8", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedDiffWithFloorWithPrecisionFloat64x8", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedDiffWithRoundWithPrecisionFloat64x8", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedDiffWithTruncWithPrecisionFloat64x8", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedFloorWithPrecisionFloat64x8", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedRoundWithPrecisionFloat64x8", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedTruncWithPrecisionFloat64x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "FloorWithPrecisionMaskedFloat64x8", argLength: 2, commutative: false, aux: "Int8"}, {name: "RoundWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "RoundWithPrecisionMaskedFloat64x8", argLength: 2, commutative: false, aux: "Int8"}, {name: "TruncWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "TruncWithPrecisionMaskedFloat64x8", argLength: 2, commutative: false, aux: "Int8"}, {name: "Get128Int16x16", argLength: 1, commutative: false, aux: "Int8"}, - {name: "MaskedShiftAllLeftAndFillUpperFromInt16x16", argLength: 3, commutative: false, aux: "Int8"}, - {name: "MaskedShiftAllRightAndFillUpperFromInt16x16", argLength: 3, commutative: false, aux: "Int8"}, {name: "Set128Int16x16", argLength: 2, commutative: false, aux: "Int8"}, {name: "ShiftAllLeftAndFillUpperFromInt16x16", argLength: 2, commutative: false, aux: "Int8"}, + {name: "ShiftAllLeftAndFillUpperFromMaskedInt16x16", argLength: 3, commutative: false, aux: "Int8"}, {name: "ShiftAllRightAndFillUpperFromInt16x16", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedShiftAllLeftAndFillUpperFromInt16x32", argLength: 3, commutative: false, aux: "Int8"}, - {name: "MaskedShiftAllRightAndFillUpperFromInt16x32", argLength: 3, commutative: false, aux: "Int8"}, + {name: "ShiftAllRightAndFillUpperFromMaskedInt16x16", argLength: 3, commutative: false, aux: "Int8"}, {name: "ShiftAllLeftAndFillUpperFromInt16x32", argLength: 2, commutative: false, aux: "Int8"}, + {name: "ShiftAllLeftAndFillUpperFromMaskedInt16x32", argLength: 3, commutative: false, aux: "Int8"}, {name: "ShiftAllRightAndFillUpperFromInt16x32", argLength: 2, commutative: false, aux: "Int8"}, + {name: "ShiftAllRightAndFillUpperFromMaskedInt16x32", argLength: 3, commutative: false, aux: "Int8"}, {name: "GetElemInt16x8", argLength: 1, commutative: false, aux: "Int8"}, - {name: "MaskedShiftAllLeftAndFillUpperFromInt16x8", argLength: 3, commutative: false, aux: "Int8"}, - {name: "MaskedShiftAllRightAndFillUpperFromInt16x8", argLength: 3, commutative: false, aux: "Int8"}, {name: "SetElemInt16x8", argLength: 2, commutative: false, aux: "Int8"}, {name: "ShiftAllLeftAndFillUpperFromInt16x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "ShiftAllLeftAndFillUpperFromMaskedInt16x8", argLength: 3, commutative: false, aux: "Int8"}, {name: "ShiftAllRightAndFillUpperFromInt16x8", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedRotateAllLeftInt32x16", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedRotateAllRightInt32x16", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedShiftAllLeftAndFillUpperFromInt32x16", argLength: 3, commutative: false, aux: "Int8"}, - {name: "MaskedShiftAllRightAndFillUpperFromInt32x16", argLength: 3, commutative: false, aux: "Int8"}, + {name: "ShiftAllRightAndFillUpperFromMaskedInt16x8", argLength: 3, commutative: false, aux: "Int8"}, {name: "RotateAllLeftInt32x16", argLength: 1, commutative: false, aux: "Int8"}, + {name: "RotateAllLeftMaskedInt32x16", argLength: 2, commutative: false, aux: "Int8"}, {name: "RotateAllRightInt32x16", argLength: 1, commutative: false, aux: "Int8"}, + {name: "RotateAllRightMaskedInt32x16", argLength: 2, commutative: false, aux: "Int8"}, {name: "ShiftAllLeftAndFillUpperFromInt32x16", argLength: 2, commutative: false, aux: "Int8"}, + {name: "ShiftAllLeftAndFillUpperFromMaskedInt32x16", argLength: 3, commutative: false, aux: "Int8"}, {name: "ShiftAllRightAndFillUpperFromInt32x16", argLength: 2, commutative: false, aux: "Int8"}, + {name: "ShiftAllRightAndFillUpperFromMaskedInt32x16", argLength: 3, commutative: false, aux: "Int8"}, {name: "GetElemInt32x4", argLength: 1, commutative: false, aux: "Int8"}, - {name: "MaskedRotateAllLeftInt32x4", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedRotateAllRightInt32x4", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedShiftAllLeftAndFillUpperFromInt32x4", argLength: 3, commutative: false, aux: "Int8"}, - {name: "MaskedShiftAllRightAndFillUpperFromInt32x4", argLength: 3, commutative: false, aux: "Int8"}, {name: "RotateAllLeftInt32x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "RotateAllLeftMaskedInt32x4", argLength: 2, commutative: false, aux: "Int8"}, {name: "RotateAllRightInt32x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "RotateAllRightMaskedInt32x4", argLength: 2, commutative: false, aux: "Int8"}, {name: "SetElemInt32x4", argLength: 2, commutative: false, aux: "Int8"}, {name: "ShiftAllLeftAndFillUpperFromInt32x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "ShiftAllLeftAndFillUpperFromMaskedInt32x4", argLength: 3, commutative: false, aux: "Int8"}, {name: "ShiftAllRightAndFillUpperFromInt32x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "ShiftAllRightAndFillUpperFromMaskedInt32x4", argLength: 3, commutative: false, aux: "Int8"}, {name: "Get128Int32x8", argLength: 1, commutative: false, aux: "Int8"}, - {name: "MaskedRotateAllLeftInt32x8", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedRotateAllRightInt32x8", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedShiftAllLeftAndFillUpperFromInt32x8", argLength: 3, commutative: false, aux: "Int8"}, - {name: "MaskedShiftAllRightAndFillUpperFromInt32x8", argLength: 3, commutative: false, aux: "Int8"}, {name: "RotateAllLeftInt32x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "RotateAllLeftMaskedInt32x8", argLength: 2, commutative: false, aux: "Int8"}, {name: "RotateAllRightInt32x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "RotateAllRightMaskedInt32x8", argLength: 2, commutative: false, aux: "Int8"}, {name: "Set128Int32x8", argLength: 2, commutative: false, aux: "Int8"}, {name: "ShiftAllLeftAndFillUpperFromInt32x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "ShiftAllLeftAndFillUpperFromMaskedInt32x8", argLength: 3, commutative: false, aux: "Int8"}, {name: "ShiftAllRightAndFillUpperFromInt32x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "ShiftAllRightAndFillUpperFromMaskedInt32x8", argLength: 3, commutative: false, aux: "Int8"}, {name: "GetElemInt64x2", argLength: 1, commutative: false, aux: "Int8"}, - {name: "MaskedRotateAllLeftInt64x2", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedRotateAllRightInt64x2", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedShiftAllLeftAndFillUpperFromInt64x2", argLength: 3, commutative: false, aux: "Int8"}, - {name: "MaskedShiftAllRightAndFillUpperFromInt64x2", argLength: 3, commutative: false, aux: "Int8"}, {name: "RotateAllLeftInt64x2", argLength: 1, commutative: false, aux: "Int8"}, + {name: "RotateAllLeftMaskedInt64x2", argLength: 2, commutative: false, aux: "Int8"}, {name: "RotateAllRightInt64x2", argLength: 1, commutative: false, aux: "Int8"}, + {name: "RotateAllRightMaskedInt64x2", argLength: 2, commutative: false, aux: "Int8"}, {name: "SetElemInt64x2", argLength: 2, commutative: false, aux: "Int8"}, {name: "ShiftAllLeftAndFillUpperFromInt64x2", argLength: 2, commutative: false, aux: "Int8"}, + {name: "ShiftAllLeftAndFillUpperFromMaskedInt64x2", argLength: 3, commutative: false, aux: "Int8"}, {name: "ShiftAllRightAndFillUpperFromInt64x2", argLength: 2, commutative: false, aux: "Int8"}, + {name: "ShiftAllRightAndFillUpperFromMaskedInt64x2", argLength: 3, commutative: false, aux: "Int8"}, {name: "Get128Int64x4", argLength: 1, commutative: false, aux: "Int8"}, - {name: "MaskedRotateAllLeftInt64x4", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedRotateAllRightInt64x4", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedShiftAllLeftAndFillUpperFromInt64x4", argLength: 3, commutative: false, aux: "Int8"}, - {name: "MaskedShiftAllRightAndFillUpperFromInt64x4", argLength: 3, commutative: false, aux: "Int8"}, {name: "RotateAllLeftInt64x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "RotateAllLeftMaskedInt64x4", argLength: 2, commutative: false, aux: "Int8"}, {name: "RotateAllRightInt64x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "RotateAllRightMaskedInt64x4", argLength: 2, commutative: false, aux: "Int8"}, {name: "Set128Int64x4", argLength: 2, commutative: false, aux: "Int8"}, {name: "ShiftAllLeftAndFillUpperFromInt64x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "ShiftAllLeftAndFillUpperFromMaskedInt64x4", argLength: 3, commutative: false, aux: "Int8"}, {name: "ShiftAllRightAndFillUpperFromInt64x4", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedRotateAllLeftInt64x8", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedRotateAllRightInt64x8", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedShiftAllLeftAndFillUpperFromInt64x8", argLength: 3, commutative: false, aux: "Int8"}, - {name: "MaskedShiftAllRightAndFillUpperFromInt64x8", argLength: 3, commutative: false, aux: "Int8"}, + {name: "ShiftAllRightAndFillUpperFromMaskedInt64x4", argLength: 3, commutative: false, aux: "Int8"}, {name: "RotateAllLeftInt64x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "RotateAllLeftMaskedInt64x8", argLength: 2, commutative: false, aux: "Int8"}, {name: "RotateAllRightInt64x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "RotateAllRightMaskedInt64x8", argLength: 2, commutative: false, aux: "Int8"}, {name: "ShiftAllLeftAndFillUpperFromInt64x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "ShiftAllLeftAndFillUpperFromMaskedInt64x8", argLength: 3, commutative: false, aux: "Int8"}, {name: "ShiftAllRightAndFillUpperFromInt64x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "ShiftAllRightAndFillUpperFromMaskedInt64x8", argLength: 3, commutative: false, aux: "Int8"}, {name: "GetElemInt8x16", argLength: 1, commutative: false, aux: "Int8"}, {name: "SetElemInt8x16", argLength: 2, commutative: false, aux: "Int8"}, {name: "Get128Int8x32", argLength: 1, commutative: false, aux: "Int8"}, {name: "Set128Int8x32", argLength: 2, commutative: false, aux: "Int8"}, {name: "Get128Uint16x16", argLength: 1, commutative: false, aux: "Int8"}, - {name: "MaskedShiftAllLeftAndFillUpperFromUint16x16", argLength: 3, commutative: false, aux: "Int8"}, - {name: "MaskedShiftAllRightAndFillUpperFromUint16x16", argLength: 3, commutative: false, aux: "Int8"}, {name: "Set128Uint16x16", argLength: 2, commutative: false, aux: "Int8"}, {name: "ShiftAllLeftAndFillUpperFromUint16x16", argLength: 2, commutative: false, aux: "Int8"}, + {name: "ShiftAllLeftAndFillUpperFromMaskedUint16x16", argLength: 3, commutative: false, aux: "Int8"}, {name: "ShiftAllRightAndFillUpperFromUint16x16", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedShiftAllLeftAndFillUpperFromUint16x32", argLength: 3, commutative: false, aux: "Int8"}, - {name: "MaskedShiftAllRightAndFillUpperFromUint16x32", argLength: 3, commutative: false, aux: "Int8"}, + {name: "ShiftAllRightAndFillUpperFromMaskedUint16x16", argLength: 3, commutative: false, aux: "Int8"}, {name: "ShiftAllLeftAndFillUpperFromUint16x32", argLength: 2, commutative: false, aux: "Int8"}, + {name: "ShiftAllLeftAndFillUpperFromMaskedUint16x32", argLength: 3, commutative: false, aux: "Int8"}, {name: "ShiftAllRightAndFillUpperFromUint16x32", argLength: 2, commutative: false, aux: "Int8"}, + {name: "ShiftAllRightAndFillUpperFromMaskedUint16x32", argLength: 3, commutative: false, aux: "Int8"}, {name: "GetElemUint16x8", argLength: 1, commutative: false, aux: "Int8"}, - {name: "MaskedShiftAllLeftAndFillUpperFromUint16x8", argLength: 3, commutative: false, aux: "Int8"}, - {name: "MaskedShiftAllRightAndFillUpperFromUint16x8", argLength: 3, commutative: false, aux: "Int8"}, {name: "SetElemUint16x8", argLength: 2, commutative: false, aux: "Int8"}, {name: "ShiftAllLeftAndFillUpperFromUint16x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "ShiftAllLeftAndFillUpperFromMaskedUint16x8", argLength: 3, commutative: false, aux: "Int8"}, {name: "ShiftAllRightAndFillUpperFromUint16x8", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedRotateAllLeftUint32x16", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedRotateAllRightUint32x16", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedShiftAllLeftAndFillUpperFromUint32x16", argLength: 3, commutative: false, aux: "Int8"}, - {name: "MaskedShiftAllRightAndFillUpperFromUint32x16", argLength: 3, commutative: false, aux: "Int8"}, + {name: "ShiftAllRightAndFillUpperFromMaskedUint16x8", argLength: 3, commutative: false, aux: "Int8"}, {name: "RotateAllLeftUint32x16", argLength: 1, commutative: false, aux: "Int8"}, + {name: "RotateAllLeftMaskedUint32x16", argLength: 2, commutative: false, aux: "Int8"}, {name: "RotateAllRightUint32x16", argLength: 1, commutative: false, aux: "Int8"}, + {name: "RotateAllRightMaskedUint32x16", argLength: 2, commutative: false, aux: "Int8"}, {name: "ShiftAllLeftAndFillUpperFromUint32x16", argLength: 2, commutative: false, aux: "Int8"}, + {name: "ShiftAllLeftAndFillUpperFromMaskedUint32x16", argLength: 3, commutative: false, aux: "Int8"}, {name: "ShiftAllRightAndFillUpperFromUint32x16", argLength: 2, commutative: false, aux: "Int8"}, + {name: "ShiftAllRightAndFillUpperFromMaskedUint32x16", argLength: 3, commutative: false, aux: "Int8"}, {name: "GetElemUint32x4", argLength: 1, commutative: false, aux: "Int8"}, - {name: "MaskedRotateAllLeftUint32x4", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedRotateAllRightUint32x4", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedShiftAllLeftAndFillUpperFromUint32x4", argLength: 3, commutative: false, aux: "Int8"}, - {name: "MaskedShiftAllRightAndFillUpperFromUint32x4", argLength: 3, commutative: false, aux: "Int8"}, {name: "RotateAllLeftUint32x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "RotateAllLeftMaskedUint32x4", argLength: 2, commutative: false, aux: "Int8"}, {name: "RotateAllRightUint32x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "RotateAllRightMaskedUint32x4", argLength: 2, commutative: false, aux: "Int8"}, {name: "SetElemUint32x4", argLength: 2, commutative: false, aux: "Int8"}, {name: "ShiftAllLeftAndFillUpperFromUint32x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "ShiftAllLeftAndFillUpperFromMaskedUint32x4", argLength: 3, commutative: false, aux: "Int8"}, {name: "ShiftAllRightAndFillUpperFromUint32x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "ShiftAllRightAndFillUpperFromMaskedUint32x4", argLength: 3, commutative: false, aux: "Int8"}, {name: "Get128Uint32x8", argLength: 1, commutative: false, aux: "Int8"}, - {name: "MaskedRotateAllLeftUint32x8", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedRotateAllRightUint32x8", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedShiftAllLeftAndFillUpperFromUint32x8", argLength: 3, commutative: false, aux: "Int8"}, - {name: "MaskedShiftAllRightAndFillUpperFromUint32x8", argLength: 3, commutative: false, aux: "Int8"}, {name: "RotateAllLeftUint32x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "RotateAllLeftMaskedUint32x8", argLength: 2, commutative: false, aux: "Int8"}, {name: "RotateAllRightUint32x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "RotateAllRightMaskedUint32x8", argLength: 2, commutative: false, aux: "Int8"}, {name: "Set128Uint32x8", argLength: 2, commutative: false, aux: "Int8"}, {name: "ShiftAllLeftAndFillUpperFromUint32x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "ShiftAllLeftAndFillUpperFromMaskedUint32x8", argLength: 3, commutative: false, aux: "Int8"}, {name: "ShiftAllRightAndFillUpperFromUint32x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "ShiftAllRightAndFillUpperFromMaskedUint32x8", argLength: 3, commutative: false, aux: "Int8"}, {name: "GetElemUint64x2", argLength: 1, commutative: false, aux: "Int8"}, - {name: "MaskedRotateAllLeftUint64x2", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedRotateAllRightUint64x2", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedShiftAllLeftAndFillUpperFromUint64x2", argLength: 3, commutative: false, aux: "Int8"}, - {name: "MaskedShiftAllRightAndFillUpperFromUint64x2", argLength: 3, commutative: false, aux: "Int8"}, {name: "RotateAllLeftUint64x2", argLength: 1, commutative: false, aux: "Int8"}, + {name: "RotateAllLeftMaskedUint64x2", argLength: 2, commutative: false, aux: "Int8"}, {name: "RotateAllRightUint64x2", argLength: 1, commutative: false, aux: "Int8"}, + {name: "RotateAllRightMaskedUint64x2", argLength: 2, commutative: false, aux: "Int8"}, {name: "SetElemUint64x2", argLength: 2, commutative: false, aux: "Int8"}, {name: "ShiftAllLeftAndFillUpperFromUint64x2", argLength: 2, commutative: false, aux: "Int8"}, + {name: "ShiftAllLeftAndFillUpperFromMaskedUint64x2", argLength: 3, commutative: false, aux: "Int8"}, {name: "ShiftAllRightAndFillUpperFromUint64x2", argLength: 2, commutative: false, aux: "Int8"}, + {name: "ShiftAllRightAndFillUpperFromMaskedUint64x2", argLength: 3, commutative: false, aux: "Int8"}, {name: "Get128Uint64x4", argLength: 1, commutative: false, aux: "Int8"}, - {name: "MaskedRotateAllLeftUint64x4", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedRotateAllRightUint64x4", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedShiftAllLeftAndFillUpperFromUint64x4", argLength: 3, commutative: false, aux: "Int8"}, - {name: "MaskedShiftAllRightAndFillUpperFromUint64x4", argLength: 3, commutative: false, aux: "Int8"}, {name: "RotateAllLeftUint64x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "RotateAllLeftMaskedUint64x4", argLength: 2, commutative: false, aux: "Int8"}, {name: "RotateAllRightUint64x4", argLength: 1, commutative: false, aux: "Int8"}, + {name: "RotateAllRightMaskedUint64x4", argLength: 2, commutative: false, aux: "Int8"}, {name: "Set128Uint64x4", argLength: 2, commutative: false, aux: "Int8"}, {name: "ShiftAllLeftAndFillUpperFromUint64x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "ShiftAllLeftAndFillUpperFromMaskedUint64x4", argLength: 3, commutative: false, aux: "Int8"}, {name: "ShiftAllRightAndFillUpperFromUint64x4", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedRotateAllLeftUint64x8", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedRotateAllRightUint64x8", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedShiftAllLeftAndFillUpperFromUint64x8", argLength: 3, commutative: false, aux: "Int8"}, - {name: "MaskedShiftAllRightAndFillUpperFromUint64x8", argLength: 3, commutative: false, aux: "Int8"}, + {name: "ShiftAllRightAndFillUpperFromMaskedUint64x4", argLength: 3, commutative: false, aux: "Int8"}, {name: "RotateAllLeftUint64x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "RotateAllLeftMaskedUint64x8", argLength: 2, commutative: false, aux: "Int8"}, {name: "RotateAllRightUint64x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "RotateAllRightMaskedUint64x8", argLength: 2, commutative: false, aux: "Int8"}, {name: "ShiftAllLeftAndFillUpperFromUint64x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "ShiftAllLeftAndFillUpperFromMaskedUint64x8", argLength: 3, commutative: false, aux: "Int8"}, {name: "ShiftAllRightAndFillUpperFromUint64x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "ShiftAllRightAndFillUpperFromMaskedUint64x8", argLength: 3, commutative: false, aux: "Int8"}, {name: "GaloisFieldAffineTransformUint8x16", argLength: 2, commutative: false, aux: "Int8"}, {name: "GaloisFieldAffineTransformInversedUint8x16", argLength: 2, commutative: false, aux: "Int8"}, + {name: "GaloisFieldAffineTransformInversedMaskedUint8x16", argLength: 3, commutative: false, aux: "Int8"}, + {name: "GaloisFieldAffineTransformMaskedUint8x16", argLength: 3, commutative: false, aux: "Int8"}, {name: "GetElemUint8x16", argLength: 1, commutative: false, aux: "Int8"}, - {name: "MaskedGaloisFieldAffineTransformUint8x16", argLength: 3, commutative: false, aux: "Int8"}, - {name: "MaskedGaloisFieldAffineTransformInversedUint8x16", argLength: 3, commutative: false, aux: "Int8"}, {name: "SetElemUint8x16", argLength: 2, commutative: false, aux: "Int8"}, {name: "GaloisFieldAffineTransformUint8x32", argLength: 2, commutative: false, aux: "Int8"}, {name: "GaloisFieldAffineTransformInversedUint8x32", argLength: 2, commutative: false, aux: "Int8"}, + {name: "GaloisFieldAffineTransformInversedMaskedUint8x32", argLength: 3, commutative: false, aux: "Int8"}, + {name: "GaloisFieldAffineTransformMaskedUint8x32", argLength: 3, commutative: false, aux: "Int8"}, {name: "Get128Uint8x32", argLength: 1, commutative: false, aux: "Int8"}, - {name: "MaskedGaloisFieldAffineTransformUint8x32", argLength: 3, commutative: false, aux: "Int8"}, - {name: "MaskedGaloisFieldAffineTransformInversedUint8x32", argLength: 3, commutative: false, aux: "Int8"}, {name: "Set128Uint8x32", argLength: 2, commutative: false, aux: "Int8"}, {name: "GaloisFieldAffineTransformUint8x64", argLength: 2, commutative: false, aux: "Int8"}, {name: "GaloisFieldAffineTransformInversedUint8x64", argLength: 2, commutative: false, aux: "Int8"}, - {name: "MaskedGaloisFieldAffineTransformUint8x64", argLength: 3, commutative: false, aux: "Int8"}, - {name: "MaskedGaloisFieldAffineTransformInversedUint8x64", argLength: 3, commutative: false, aux: "Int8"}, + {name: "GaloisFieldAffineTransformInversedMaskedUint8x64", argLength: 3, commutative: false, aux: "Int8"}, + {name: "GaloisFieldAffineTransformMaskedUint8x64", argLength: 3, commutative: false, aux: "Int8"}, } } diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 48428ead1f..4251c013a8 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -1197,836 +1197,836 @@ const ( OpAMD64Zero256 OpAMD64Zero512 OpAMD64VADDPS512 - OpAMD64VRCP14PS512 - OpAMD64VRSQRT14PS512 - OpAMD64VDIVPS512 - OpAMD64VFMADD213PS512 - OpAMD64VFMADDSUB213PS512 - OpAMD64VFMSUBADD213PS512 OpAMD64VADDPSMasked512 + OpAMD64VRCP14PS512 OpAMD64VRCP14PSMasked512 + OpAMD64VRSQRT14PS512 OpAMD64VRSQRT14PSMasked512 + OpAMD64VDIVPS512 OpAMD64VDIVPSMasked512 + OpAMD64VFMADD213PS512 OpAMD64VFMADD213PSMasked512 + OpAMD64VFMADDSUB213PS512 OpAMD64VFMADDSUB213PSMasked512 + OpAMD64VFMSUBADD213PS512 OpAMD64VFMSUBADD213PSMasked512 - OpAMD64VMAXPSMasked512 - OpAMD64VMINPSMasked512 - OpAMD64VMULPSMasked512 - OpAMD64VSCALEFPSMasked512 - OpAMD64VSQRTPSMasked512 - OpAMD64VSUBPSMasked512 OpAMD64VMAXPS512 + OpAMD64VMAXPSMasked512 OpAMD64VMINPS512 + OpAMD64VMINPSMasked512 OpAMD64VMULPS512 OpAMD64VSCALEFPS512 + OpAMD64VSCALEFPSMasked512 + OpAMD64VMULPSMasked512 OpAMD64VSQRTPS512 + OpAMD64VSQRTPSMasked512 OpAMD64VSUBPS512 + OpAMD64VSUBPSMasked512 OpAMD64VADDPS128 + OpAMD64VADDPSMasked128 OpAMD64VADDSUBPS128 OpAMD64VRCP14PS128 - OpAMD64VRSQRTPS128 - OpAMD64VDIVPS128 - OpAMD64VFMADD213PS128 - OpAMD64VFMADDSUB213PS128 - OpAMD64VFMSUBADD213PS128 - OpAMD64VADDPSMasked128 OpAMD64VRCP14PSMasked128 + OpAMD64VRSQRTPS128 OpAMD64VRSQRT14PSMasked128 + OpAMD64VDIVPS128 OpAMD64VDIVPSMasked128 + OpAMD64VFMADD213PS128 OpAMD64VFMADD213PSMasked128 + OpAMD64VFMADDSUB213PS128 OpAMD64VFMADDSUB213PSMasked128 + OpAMD64VFMSUBADD213PS128 OpAMD64VFMSUBADD213PSMasked128 - OpAMD64VMAXPSMasked128 - OpAMD64VMINPSMasked128 - OpAMD64VMULPSMasked128 - OpAMD64VSCALEFPSMasked128 - OpAMD64VSQRTPSMasked128 - OpAMD64VSUBPSMasked128 OpAMD64VMAXPS128 + OpAMD64VMAXPSMasked128 OpAMD64VMINPS128 + OpAMD64VMINPSMasked128 OpAMD64VMULPS128 OpAMD64VSCALEFPS128 + OpAMD64VSCALEFPSMasked128 + OpAMD64VMULPSMasked128 OpAMD64VHADDPS128 OpAMD64VHSUBPS128 OpAMD64VSQRTPS128 + OpAMD64VSQRTPSMasked128 OpAMD64VSUBPS128 + OpAMD64VSUBPSMasked128 OpAMD64VADDPS256 + OpAMD64VADDPSMasked256 OpAMD64VADDSUBPS256 OpAMD64VRCP14PS256 - OpAMD64VRSQRTPS256 - OpAMD64VDIVPS256 - OpAMD64VFMADD213PS256 - OpAMD64VFMADDSUB213PS256 - OpAMD64VFMSUBADD213PS256 - OpAMD64VADDPSMasked256 OpAMD64VRCP14PSMasked256 + OpAMD64VRSQRTPS256 OpAMD64VRSQRT14PSMasked256 + OpAMD64VDIVPS256 OpAMD64VDIVPSMasked256 + OpAMD64VFMADD213PS256 OpAMD64VFMADD213PSMasked256 + OpAMD64VFMADDSUB213PS256 OpAMD64VFMADDSUB213PSMasked256 + OpAMD64VFMSUBADD213PS256 OpAMD64VFMSUBADD213PSMasked256 - OpAMD64VMAXPSMasked256 - OpAMD64VMINPSMasked256 - OpAMD64VMULPSMasked256 - OpAMD64VSCALEFPSMasked256 - OpAMD64VSQRTPSMasked256 - OpAMD64VSUBPSMasked256 OpAMD64VMAXPS256 + OpAMD64VMAXPSMasked256 OpAMD64VMINPS256 + OpAMD64VMINPSMasked256 OpAMD64VMULPS256 OpAMD64VSCALEFPS256 + OpAMD64VSCALEFPSMasked256 + OpAMD64VMULPSMasked256 OpAMD64VHADDPS256 OpAMD64VHSUBPS256 OpAMD64VSQRTPS256 + OpAMD64VSQRTPSMasked256 OpAMD64VSUBPS256 + OpAMD64VSUBPSMasked256 OpAMD64VADDPD128 + OpAMD64VADDPDMasked128 OpAMD64VADDSUBPD128 OpAMD64VRCP14PD128 - OpAMD64VRSQRT14PD128 - OpAMD64VDIVPD128 - OpAMD64VFMADD213PD128 - OpAMD64VFMADDSUB213PD128 - OpAMD64VFMSUBADD213PD128 - OpAMD64VADDPDMasked128 OpAMD64VRCP14PDMasked128 + OpAMD64VRSQRT14PD128 OpAMD64VRSQRT14PDMasked128 + OpAMD64VDIVPD128 OpAMD64VDIVPDMasked128 + OpAMD64VFMADD213PD128 OpAMD64VFMADD213PDMasked128 + OpAMD64VFMADDSUB213PD128 OpAMD64VFMADDSUB213PDMasked128 + OpAMD64VFMSUBADD213PD128 OpAMD64VFMSUBADD213PDMasked128 - OpAMD64VMAXPDMasked128 - OpAMD64VMINPDMasked128 - OpAMD64VMULPDMasked128 - OpAMD64VSCALEFPDMasked128 - OpAMD64VSQRTPDMasked128 - OpAMD64VSUBPDMasked128 OpAMD64VMAXPD128 + OpAMD64VMAXPDMasked128 OpAMD64VMINPD128 + OpAMD64VMINPDMasked128 OpAMD64VMULPD128 OpAMD64VSCALEFPD128 + OpAMD64VSCALEFPDMasked128 + OpAMD64VMULPDMasked128 OpAMD64VHADDPD128 OpAMD64VHSUBPD128 OpAMD64VSQRTPD128 + OpAMD64VSQRTPDMasked128 OpAMD64VSUBPD128 + OpAMD64VSUBPDMasked128 OpAMD64VADDPD256 + OpAMD64VADDPDMasked256 OpAMD64VADDSUBPD256 OpAMD64VRCP14PD256 - OpAMD64VRSQRT14PD256 - OpAMD64VDIVPD256 - OpAMD64VFMADD213PD256 - OpAMD64VFMADDSUB213PD256 - OpAMD64VFMSUBADD213PD256 - OpAMD64VADDPDMasked256 OpAMD64VRCP14PDMasked256 + OpAMD64VRSQRT14PD256 OpAMD64VRSQRT14PDMasked256 + OpAMD64VDIVPD256 OpAMD64VDIVPDMasked256 + OpAMD64VFMADD213PD256 OpAMD64VFMADD213PDMasked256 + OpAMD64VFMADDSUB213PD256 OpAMD64VFMADDSUB213PDMasked256 + OpAMD64VFMSUBADD213PD256 OpAMD64VFMSUBADD213PDMasked256 - OpAMD64VMAXPDMasked256 - OpAMD64VMINPDMasked256 - OpAMD64VMULPDMasked256 - OpAMD64VSCALEFPDMasked256 - OpAMD64VSQRTPDMasked256 - OpAMD64VSUBPDMasked256 OpAMD64VMAXPD256 + OpAMD64VMAXPDMasked256 OpAMD64VMINPD256 + OpAMD64VMINPDMasked256 OpAMD64VMULPD256 OpAMD64VSCALEFPD256 + OpAMD64VSCALEFPDMasked256 + OpAMD64VMULPDMasked256 OpAMD64VHADDPD256 OpAMD64VHSUBPD256 OpAMD64VSQRTPD256 + OpAMD64VSQRTPDMasked256 OpAMD64VSUBPD256 + OpAMD64VSUBPDMasked256 OpAMD64VADDPD512 - OpAMD64VRCP14PD512 - OpAMD64VRSQRT14PD512 - OpAMD64VDIVPD512 - OpAMD64VFMADD213PD512 - OpAMD64VFMADDSUB213PD512 - OpAMD64VFMSUBADD213PD512 OpAMD64VADDPDMasked512 + OpAMD64VRCP14PD512 OpAMD64VRCP14PDMasked512 + OpAMD64VRSQRT14PD512 OpAMD64VRSQRT14PDMasked512 + OpAMD64VDIVPD512 OpAMD64VDIVPDMasked512 + OpAMD64VFMADD213PD512 OpAMD64VFMADD213PDMasked512 + OpAMD64VFMADDSUB213PD512 OpAMD64VFMADDSUB213PDMasked512 + OpAMD64VFMSUBADD213PD512 OpAMD64VFMSUBADD213PDMasked512 - OpAMD64VMAXPDMasked512 - OpAMD64VMINPDMasked512 - OpAMD64VMULPDMasked512 - OpAMD64VSCALEFPDMasked512 - OpAMD64VSQRTPDMasked512 - OpAMD64VSUBPDMasked512 OpAMD64VMAXPD512 + OpAMD64VMAXPDMasked512 OpAMD64VMINPD512 + OpAMD64VMINPDMasked512 OpAMD64VMULPD512 OpAMD64VSCALEFPD512 + OpAMD64VSCALEFPDMasked512 + OpAMD64VMULPDMasked512 OpAMD64VSQRTPD512 + OpAMD64VSQRTPDMasked512 OpAMD64VSUBPD512 + OpAMD64VSUBPDMasked512 OpAMD64VPABSW256 + OpAMD64VPABSWMasked256 OpAMD64VPADDW256 + OpAMD64VPADDWMasked256 OpAMD64VPCMPEQW256 OpAMD64VPCMPGTW256 - OpAMD64VPABSWMasked256 - OpAMD64VPADDWMasked256 - OpAMD64VPMAXSWMasked256 - OpAMD64VPMINSWMasked256 - OpAMD64VPMULHWMasked256 - OpAMD64VPMULLWMasked256 - OpAMD64VPMADDWDMasked256 - OpAMD64VPOPCNTWMasked256 - OpAMD64VPADDSWMasked256 - OpAMD64VPSUBSWMasked256 - OpAMD64VPSLLVWMasked256 - OpAMD64VPSHLDVWMasked256 - OpAMD64VPSRLVWMasked256 - OpAMD64VPSHRDVWMasked256 - OpAMD64VPSRAVWMasked256 - OpAMD64VPSUBWMasked256 OpAMD64VPMAXSW256 + OpAMD64VPMAXSWMasked256 OpAMD64VPMINSW256 + OpAMD64VPMINSWMasked256 OpAMD64VPMULHW256 + OpAMD64VPMULHWMasked256 OpAMD64VPMULLW256 + OpAMD64VPMULLWMasked256 OpAMD64VPMADDWD256 + OpAMD64VPMADDWDMasked256 OpAMD64VPHADDW256 OpAMD64VPHSUBW256 OpAMD64VPOPCNTW256 + OpAMD64VPOPCNTWMasked256 OpAMD64VPADDSW256 + OpAMD64VPADDSWMasked256 OpAMD64VPHADDSW256 OpAMD64VPHSUBSW256 OpAMD64VPSUBSW256 + OpAMD64VPSUBSWMasked256 OpAMD64VPSLLW256 OpAMD64VPSRLW256 OpAMD64VPSRAW256 OpAMD64VPSLLVW256 OpAMD64VPSHLDVW256 + OpAMD64VPSHLDVWMasked256 + OpAMD64VPSLLVWMasked256 OpAMD64VPSRLVW256 OpAMD64VPSHRDVW256 + OpAMD64VPSHRDVWMasked256 + OpAMD64VPSRLVWMasked256 OpAMD64VPSRAVW256 + OpAMD64VPSRAVWMasked256 OpAMD64VPSIGNW256 OpAMD64VPSUBW256 + OpAMD64VPSUBWMasked256 OpAMD64VPABSW512 - OpAMD64VPADDW512 OpAMD64VPABSWMasked512 + OpAMD64VPADDW512 OpAMD64VPADDWMasked512 - OpAMD64VPMAXSWMasked512 - OpAMD64VPMINSWMasked512 - OpAMD64VPMULHWMasked512 - OpAMD64VPMULLWMasked512 - OpAMD64VPMADDWDMasked512 - OpAMD64VPOPCNTWMasked512 - OpAMD64VPADDSWMasked512 - OpAMD64VPSUBSWMasked512 - OpAMD64VPSLLVWMasked512 - OpAMD64VPSHLDVWMasked512 - OpAMD64VPSRLVWMasked512 - OpAMD64VPSHRDVWMasked512 - OpAMD64VPSRAVWMasked512 - OpAMD64VPSUBWMasked512 OpAMD64VPMAXSW512 + OpAMD64VPMAXSWMasked512 OpAMD64VPMINSW512 + OpAMD64VPMINSWMasked512 OpAMD64VPMULHW512 + OpAMD64VPMULHWMasked512 OpAMD64VPMULLW512 + OpAMD64VPMULLWMasked512 OpAMD64VPMADDWD512 + OpAMD64VPMADDWDMasked512 OpAMD64VPOPCNTW512 + OpAMD64VPOPCNTWMasked512 OpAMD64VPADDSW512 + OpAMD64VPADDSWMasked512 OpAMD64VPSUBSW512 + OpAMD64VPSUBSWMasked512 OpAMD64VPSLLVW512 OpAMD64VPSHLDVW512 + OpAMD64VPSHLDVWMasked512 + OpAMD64VPSLLVWMasked512 OpAMD64VPSRLVW512 OpAMD64VPSHRDVW512 + OpAMD64VPSHRDVWMasked512 + OpAMD64VPSRLVWMasked512 OpAMD64VPSRAVW512 + OpAMD64VPSRAVWMasked512 OpAMD64VPSUBW512 + OpAMD64VPSUBWMasked512 OpAMD64VPABSW128 + OpAMD64VPABSWMasked128 OpAMD64VPADDW128 + OpAMD64VPADDWMasked128 OpAMD64VPCMPEQW128 OpAMD64VPCMPGTW128 - OpAMD64VPABSWMasked128 - OpAMD64VPADDWMasked128 - OpAMD64VPMAXSWMasked128 - OpAMD64VPMINSWMasked128 - OpAMD64VPMULHWMasked128 - OpAMD64VPMULLWMasked128 - OpAMD64VPMADDWDMasked128 - OpAMD64VPOPCNTWMasked128 - OpAMD64VPADDSWMasked128 - OpAMD64VPSUBSWMasked128 - OpAMD64VPSLLVWMasked128 - OpAMD64VPSHLDVWMasked128 - OpAMD64VPSRLVWMasked128 - OpAMD64VPSHRDVWMasked128 - OpAMD64VPSRAVWMasked128 - OpAMD64VPSUBWMasked128 OpAMD64VPMAXSW128 + OpAMD64VPMAXSWMasked128 OpAMD64VPMINSW128 + OpAMD64VPMINSWMasked128 OpAMD64VPMULHW128 + OpAMD64VPMULHWMasked128 OpAMD64VPMULLW128 + OpAMD64VPMULLWMasked128 OpAMD64VPMADDWD128 + OpAMD64VPMADDWDMasked128 OpAMD64VPHADDW128 OpAMD64VPHSUBW128 OpAMD64VPOPCNTW128 + OpAMD64VPOPCNTWMasked128 OpAMD64VPADDSW128 + OpAMD64VPADDSWMasked128 OpAMD64VPHADDSW128 OpAMD64VPHSUBSW128 OpAMD64VPSUBSW128 + OpAMD64VPSUBSWMasked128 OpAMD64VPSLLW128 OpAMD64VPSRLW128 OpAMD64VPSRAW128 OpAMD64VPSLLVW128 OpAMD64VPSHLDVW128 + OpAMD64VPSHLDVWMasked128 + OpAMD64VPSLLVWMasked128 OpAMD64VPSRLVW128 OpAMD64VPSHRDVW128 + OpAMD64VPSHRDVWMasked128 + OpAMD64VPSRLVWMasked128 OpAMD64VPSRAVW128 + OpAMD64VPSRAVWMasked128 OpAMD64VPSIGNW128 OpAMD64VPSUBW128 + OpAMD64VPSUBWMasked128 OpAMD64VPABSD512 - OpAMD64VPADDD512 - OpAMD64VPANDD512 - OpAMD64VPANDND512 OpAMD64VPABSDMasked512 + OpAMD64VPADDD512 OpAMD64VPADDDMasked512 + OpAMD64VPANDD512 OpAMD64VPANDDMasked512 + OpAMD64VPANDND512 OpAMD64VPANDNDMasked512 - OpAMD64VPMAXSDMasked512 - OpAMD64VPMINSDMasked512 - OpAMD64VPMULLDMasked512 - OpAMD64VPORDMasked512 - OpAMD64VPDPWSSDMasked512 - OpAMD64VPOPCNTDMasked512 - OpAMD64VPROLVDMasked512 - OpAMD64VPRORVDMasked512 - OpAMD64VPDPWSSDSMasked512 - OpAMD64VPDPBUSDSMasked512 - OpAMD64VPSLLVDMasked512 - OpAMD64VPSHLDVDMasked512 - OpAMD64VPSRLVDMasked512 - OpAMD64VPSHRDVDMasked512 - OpAMD64VPSRAVDMasked512 - OpAMD64VPSUBDMasked512 - OpAMD64VPDPBUSDMasked512 - OpAMD64VPXORDMasked512 OpAMD64VPMAXSD512 + OpAMD64VPMAXSDMasked512 OpAMD64VPMINSD512 + OpAMD64VPMINSDMasked512 OpAMD64VPMULLD512 + OpAMD64VPMULLDMasked512 OpAMD64VPORD512 + OpAMD64VPORDMasked512 OpAMD64VPDPWSSD512 + OpAMD64VPDPWSSDMasked512 OpAMD64VPOPCNTD512 + OpAMD64VPOPCNTDMasked512 OpAMD64VPROLVD512 + OpAMD64VPROLVDMasked512 OpAMD64VPRORVD512 + OpAMD64VPRORVDMasked512 OpAMD64VPDPWSSDS512 + OpAMD64VPDPWSSDSMasked512 OpAMD64VPDPBUSDS512 + OpAMD64VPDPBUSDSMasked512 OpAMD64VPSLLVD512 OpAMD64VPSHLDVD512 + OpAMD64VPSHLDVDMasked512 + OpAMD64VPSLLVDMasked512 OpAMD64VPSRLVD512 OpAMD64VPSHRDVD512 + OpAMD64VPSHRDVDMasked512 + OpAMD64VPSRLVDMasked512 OpAMD64VPSRAVD512 + OpAMD64VPSRAVDMasked512 OpAMD64VPSUBD512 + OpAMD64VPSUBDMasked512 OpAMD64VPDPBUSD512 + OpAMD64VPDPBUSDMasked512 OpAMD64VPXORD512 + OpAMD64VPXORDMasked512 OpAMD64VPABSD128 - OpAMD64VPADDD128 - OpAMD64VPCMPEQD128 - OpAMD64VPCMPGTD128 OpAMD64VPABSDMasked128 + OpAMD64VPADDD128 OpAMD64VPADDDMasked128 OpAMD64VPANDDMasked128 OpAMD64VPANDNDMasked128 - OpAMD64VPMAXSDMasked128 - OpAMD64VPMINSDMasked128 - OpAMD64VPMULLDMasked128 - OpAMD64VPORDMasked128 - OpAMD64VPDPWSSDMasked128 - OpAMD64VPOPCNTDMasked128 - OpAMD64VPROLVDMasked128 - OpAMD64VPRORVDMasked128 - OpAMD64VPDPWSSDSMasked128 - OpAMD64VPDPBUSDSMasked128 - OpAMD64VPSLLVDMasked128 - OpAMD64VPSHLDVDMasked128 - OpAMD64VPSRLVDMasked128 - OpAMD64VPSHRDVDMasked128 - OpAMD64VPSRAVDMasked128 - OpAMD64VPSUBDMasked128 - OpAMD64VPDPBUSDMasked128 - OpAMD64VPXORDMasked128 + OpAMD64VPCMPEQD128 + OpAMD64VPCMPGTD128 OpAMD64VPMAXSD128 + OpAMD64VPMAXSDMasked128 OpAMD64VPMINSD128 + OpAMD64VPMINSDMasked128 OpAMD64VPMULDQ128 OpAMD64VPMULLD128 + OpAMD64VPMULLDMasked128 + OpAMD64VPORDMasked128 OpAMD64VPDPWSSD128 + OpAMD64VPDPWSSDMasked128 OpAMD64VPHADDD128 OpAMD64VPHSUBD128 OpAMD64VPOPCNTD128 + OpAMD64VPOPCNTDMasked128 OpAMD64VPROLVD128 + OpAMD64VPROLVDMasked128 OpAMD64VPRORVD128 + OpAMD64VPRORVDMasked128 OpAMD64VPDPWSSDS128 + OpAMD64VPDPWSSDSMasked128 OpAMD64VPDPBUSDS128 + OpAMD64VPDPBUSDSMasked128 OpAMD64VPSLLD128 OpAMD64VPSRLD128 OpAMD64VPSRAD128 OpAMD64VPSLLVD128 OpAMD64VPSHLDVD128 + OpAMD64VPSHLDVDMasked128 + OpAMD64VPSLLVDMasked128 OpAMD64VPSRLVD128 OpAMD64VPSHRDVD128 + OpAMD64VPSHRDVDMasked128 + OpAMD64VPSRLVDMasked128 OpAMD64VPSRAVD128 + OpAMD64VPSRAVDMasked128 OpAMD64VPSIGND128 OpAMD64VPSUBD128 + OpAMD64VPSUBDMasked128 OpAMD64VPDPBUSD128 + OpAMD64VPDPBUSDMasked128 + OpAMD64VPXORDMasked128 OpAMD64VPABSD256 - OpAMD64VPADDD256 - OpAMD64VPCMPEQD256 - OpAMD64VPCMPGTD256 OpAMD64VPABSDMasked256 + OpAMD64VPADDD256 OpAMD64VPADDDMasked256 OpAMD64VPANDDMasked256 OpAMD64VPANDNDMasked256 - OpAMD64VPMAXSDMasked256 - OpAMD64VPMINSDMasked256 - OpAMD64VPMULLDMasked256 - OpAMD64VPORDMasked256 - OpAMD64VPDPWSSDMasked256 - OpAMD64VPOPCNTDMasked256 - OpAMD64VPROLVDMasked256 - OpAMD64VPRORVDMasked256 - OpAMD64VPDPWSSDSMasked256 - OpAMD64VPDPBUSDSMasked256 - OpAMD64VPSLLVDMasked256 - OpAMD64VPSHLDVDMasked256 - OpAMD64VPSRLVDMasked256 - OpAMD64VPSHRDVDMasked256 - OpAMD64VPSRAVDMasked256 - OpAMD64VPSUBDMasked256 - OpAMD64VPDPBUSDMasked256 - OpAMD64VPXORDMasked256 + OpAMD64VPCMPEQD256 + OpAMD64VPCMPGTD256 OpAMD64VPMAXSD256 + OpAMD64VPMAXSDMasked256 OpAMD64VPMINSD256 + OpAMD64VPMINSDMasked256 OpAMD64VPMULDQ256 OpAMD64VPMULLD256 + OpAMD64VPMULLDMasked256 + OpAMD64VPORDMasked256 OpAMD64VPDPWSSD256 + OpAMD64VPDPWSSDMasked256 OpAMD64VPHADDD256 OpAMD64VPHSUBD256 OpAMD64VPOPCNTD256 + OpAMD64VPOPCNTDMasked256 OpAMD64VPROLVD256 + OpAMD64VPROLVDMasked256 OpAMD64VPRORVD256 + OpAMD64VPRORVDMasked256 OpAMD64VPDPWSSDS256 + OpAMD64VPDPWSSDSMasked256 OpAMD64VPDPBUSDS256 + OpAMD64VPDPBUSDSMasked256 OpAMD64VPSLLD256 OpAMD64VPSRLD256 OpAMD64VPSRAD256 OpAMD64VPSLLVD256 OpAMD64VPSHLDVD256 + OpAMD64VPSHLDVDMasked256 + OpAMD64VPSLLVDMasked256 OpAMD64VPSRLVD256 OpAMD64VPSHRDVD256 + OpAMD64VPSHRDVDMasked256 + OpAMD64VPSRLVDMasked256 OpAMD64VPSRAVD256 + OpAMD64VPSRAVDMasked256 OpAMD64VPSIGND256 OpAMD64VPSUBD256 + OpAMD64VPSUBDMasked256 OpAMD64VPDPBUSD256 + OpAMD64VPDPBUSDMasked256 + OpAMD64VPXORDMasked256 OpAMD64VPABSQ128 - OpAMD64VPADDQ128 - OpAMD64VPCMPEQQ128 OpAMD64VPABSQMasked128 + OpAMD64VPADDQ128 OpAMD64VPADDQMasked128 OpAMD64VPANDQMasked128 OpAMD64VPANDNQMasked128 + OpAMD64VPCMPEQQ128 + OpAMD64VPMAXSQ128 OpAMD64VPMAXSQMasked128 + OpAMD64VPMINSQ128 OpAMD64VPMINSQMasked128 OpAMD64VPMULDQMasked128 + OpAMD64VPMULLQ128 OpAMD64VPMULLQMasked128 OpAMD64VPORQMasked128 - OpAMD64VPOPCNTQMasked128 - OpAMD64VPROLVQMasked128 - OpAMD64VPRORVQMasked128 - OpAMD64VPSLLQMasked128 - OpAMD64VPSRLQMasked128 - OpAMD64VPSRAQMasked128 - OpAMD64VPSLLVQMasked128 - OpAMD64VPSHLDVQMasked128 - OpAMD64VPSRLVQMasked128 - OpAMD64VPSHRDVQMasked128 - OpAMD64VPSRAVQMasked128 - OpAMD64VPSUBQMasked128 - OpAMD64VPXORQMasked128 - OpAMD64VPMAXSQ128 - OpAMD64VPMINSQ128 - OpAMD64VPMULLQ128 OpAMD64VPOPCNTQ128 + OpAMD64VPOPCNTQMasked128 OpAMD64VPROLVQ128 + OpAMD64VPROLVQMasked128 OpAMD64VPRORVQ128 + OpAMD64VPRORVQMasked128 OpAMD64VPSLLQ128 + OpAMD64VPSLLQMasked128 OpAMD64VPSRLQ128 + OpAMD64VPSRLQMasked128 OpAMD64VPSRAQ128 + OpAMD64VPSRAQMasked128 OpAMD64VPSLLVQ128 OpAMD64VPSHLDVQ128 + OpAMD64VPSHLDVQMasked128 + OpAMD64VPSLLVQMasked128 OpAMD64VPSRLVQ128 OpAMD64VPSHRDVQ128 + OpAMD64VPSHRDVQMasked128 + OpAMD64VPSRLVQMasked128 OpAMD64VPSRAVQ128 + OpAMD64VPSRAVQMasked128 OpAMD64VPSUBQ128 + OpAMD64VPSUBQMasked128 + OpAMD64VPXORQMasked128 OpAMD64VPABSQ256 - OpAMD64VPADDQ256 - OpAMD64VPCMPEQQ256 - OpAMD64VPCMPGTQ256 OpAMD64VPABSQMasked256 + OpAMD64VPADDQ256 OpAMD64VPADDQMasked256 OpAMD64VPANDQMasked256 OpAMD64VPANDNQMasked256 + OpAMD64VPCMPEQQ256 + OpAMD64VPCMPGTQ256 + OpAMD64VPMAXSQ256 OpAMD64VPMAXSQMasked256 + OpAMD64VPMINSQ256 OpAMD64VPMINSQMasked256 OpAMD64VPMULDQMasked256 + OpAMD64VPMULLQ256 OpAMD64VPMULLQMasked256 OpAMD64VPORQMasked256 - OpAMD64VPOPCNTQMasked256 - OpAMD64VPROLVQMasked256 - OpAMD64VPRORVQMasked256 - OpAMD64VPSLLQMasked256 - OpAMD64VPSRLQMasked256 - OpAMD64VPSRAQMasked256 - OpAMD64VPSLLVQMasked256 - OpAMD64VPSHLDVQMasked256 - OpAMD64VPSRLVQMasked256 - OpAMD64VPSHRDVQMasked256 - OpAMD64VPSRAVQMasked256 - OpAMD64VPSUBQMasked256 - OpAMD64VPXORQMasked256 - OpAMD64VPMAXSQ256 - OpAMD64VPMINSQ256 - OpAMD64VPMULLQ256 OpAMD64VPOPCNTQ256 + OpAMD64VPOPCNTQMasked256 OpAMD64VPROLVQ256 + OpAMD64VPROLVQMasked256 OpAMD64VPRORVQ256 + OpAMD64VPRORVQMasked256 OpAMD64VPSLLQ256 + OpAMD64VPSLLQMasked256 OpAMD64VPSRLQ256 + OpAMD64VPSRLQMasked256 OpAMD64VPSRAQ256 + OpAMD64VPSRAQMasked256 OpAMD64VPSLLVQ256 OpAMD64VPSHLDVQ256 + OpAMD64VPSHLDVQMasked256 + OpAMD64VPSLLVQMasked256 OpAMD64VPSRLVQ256 OpAMD64VPSHRDVQ256 + OpAMD64VPSHRDVQMasked256 + OpAMD64VPSRLVQMasked256 OpAMD64VPSRAVQ256 + OpAMD64VPSRAVQMasked256 OpAMD64VPSUBQ256 + OpAMD64VPSUBQMasked256 + OpAMD64VPXORQMasked256 OpAMD64VPABSQ512 - OpAMD64VPADDQ512 - OpAMD64VPANDQ512 - OpAMD64VPANDNQ512 OpAMD64VPABSQMasked512 + OpAMD64VPADDQ512 OpAMD64VPADDQMasked512 + OpAMD64VPANDQ512 OpAMD64VPANDQMasked512 + OpAMD64VPANDNQ512 OpAMD64VPANDNQMasked512 - OpAMD64VPMAXSQMasked512 - OpAMD64VPMINSQMasked512 - OpAMD64VPMULDQMasked512 - OpAMD64VPMULLQMasked512 - OpAMD64VPORQMasked512 - OpAMD64VPOPCNTQMasked512 - OpAMD64VPROLVQMasked512 - OpAMD64VPRORVQMasked512 - OpAMD64VPSLLQMasked512 - OpAMD64VPSRLQMasked512 - OpAMD64VPSRAQMasked512 - OpAMD64VPSLLVQMasked512 - OpAMD64VPSHLDVQMasked512 - OpAMD64VPSRLVQMasked512 - OpAMD64VPSHRDVQMasked512 - OpAMD64VPSRAVQMasked512 - OpAMD64VPSUBQMasked512 - OpAMD64VPXORQMasked512 OpAMD64VPMAXSQ512 + OpAMD64VPMAXSQMasked512 OpAMD64VPMINSQ512 + OpAMD64VPMINSQMasked512 OpAMD64VPMULDQ512 + OpAMD64VPMULDQMasked512 OpAMD64VPMULLQ512 + OpAMD64VPMULLQMasked512 OpAMD64VPORQ512 + OpAMD64VPORQMasked512 OpAMD64VPOPCNTQ512 + OpAMD64VPOPCNTQMasked512 OpAMD64VPROLVQ512 + OpAMD64VPROLVQMasked512 OpAMD64VPRORVQ512 + OpAMD64VPRORVQMasked512 OpAMD64VPSLLQ512 + OpAMD64VPSLLQMasked512 OpAMD64VPSRLQ512 + OpAMD64VPSRLQMasked512 OpAMD64VPSRAQ512 + OpAMD64VPSRAQMasked512 OpAMD64VPSLLVQ512 OpAMD64VPSHLDVQ512 + OpAMD64VPSHLDVQMasked512 + OpAMD64VPSLLVQMasked512 OpAMD64VPSRLVQ512 OpAMD64VPSHRDVQ512 + OpAMD64VPSHRDVQMasked512 + OpAMD64VPSRLVQMasked512 OpAMD64VPSRAVQ512 + OpAMD64VPSRAVQMasked512 OpAMD64VPSUBQ512 + OpAMD64VPSUBQMasked512 OpAMD64VPXORQ512 + OpAMD64VPXORQMasked512 OpAMD64VPABSB128 + OpAMD64VPABSBMasked128 OpAMD64VPADDB128 + OpAMD64VPADDBMasked128 OpAMD64VPAND128 OpAMD64VPANDN128 OpAMD64VPCMPEQB128 OpAMD64VPCMPGTB128 - OpAMD64VPABSBMasked128 - OpAMD64VPADDBMasked128 - OpAMD64VPMAXSBMasked128 - OpAMD64VPMINSBMasked128 - OpAMD64VPOPCNTBMasked128 - OpAMD64VPADDSBMasked128 - OpAMD64VPSUBSBMasked128 - OpAMD64VPSUBBMasked128 OpAMD64VPMAXSB128 + OpAMD64VPMAXSBMasked128 OpAMD64VPMINSB128 + OpAMD64VPMINSBMasked128 OpAMD64VPOR128 OpAMD64VPOPCNTB128 + OpAMD64VPOPCNTBMasked128 OpAMD64VPADDSB128 + OpAMD64VPADDSBMasked128 OpAMD64VPSUBSB128 + OpAMD64VPSUBSBMasked128 OpAMD64VPSIGNB128 OpAMD64VPSUBB128 + OpAMD64VPSUBBMasked128 OpAMD64VPXOR128 OpAMD64VPABSB256 + OpAMD64VPABSBMasked256 OpAMD64VPADDB256 + OpAMD64VPADDBMasked256 OpAMD64VPAND256 OpAMD64VPANDN256 OpAMD64VPCMPEQB256 OpAMD64VPCMPGTB256 - OpAMD64VPABSBMasked256 - OpAMD64VPADDBMasked256 - OpAMD64VPMAXSBMasked256 - OpAMD64VPMINSBMasked256 - OpAMD64VPOPCNTBMasked256 - OpAMD64VPADDSBMasked256 - OpAMD64VPSUBSBMasked256 - OpAMD64VPSUBBMasked256 OpAMD64VPMAXSB256 + OpAMD64VPMAXSBMasked256 OpAMD64VPMINSB256 + OpAMD64VPMINSBMasked256 OpAMD64VPOR256 OpAMD64VPOPCNTB256 + OpAMD64VPOPCNTBMasked256 OpAMD64VPADDSB256 + OpAMD64VPADDSBMasked256 OpAMD64VPSUBSB256 + OpAMD64VPSUBSBMasked256 OpAMD64VPSIGNB256 OpAMD64VPSUBB256 + OpAMD64VPSUBBMasked256 OpAMD64VPXOR256 OpAMD64VPABSB512 - OpAMD64VPADDB512 OpAMD64VPABSBMasked512 + OpAMD64VPADDB512 OpAMD64VPADDBMasked512 - OpAMD64VPMAXSBMasked512 - OpAMD64VPMINSBMasked512 - OpAMD64VPOPCNTBMasked512 - OpAMD64VPADDSBMasked512 - OpAMD64VPSUBSBMasked512 - OpAMD64VPSUBBMasked512 OpAMD64VPMAXSB512 + OpAMD64VPMAXSBMasked512 OpAMD64VPMINSB512 + OpAMD64VPMINSBMasked512 OpAMD64VPOPCNTB512 + OpAMD64VPOPCNTBMasked512 OpAMD64VPADDSB512 + OpAMD64VPADDSBMasked512 OpAMD64VPSUBSB512 + OpAMD64VPSUBSBMasked512 OpAMD64VPSUBB512 + OpAMD64VPSUBBMasked512 OpAMD64VPAVGW256 OpAMD64VPAVGWMasked256 - OpAMD64VPMAXUWMasked256 - OpAMD64VPMINUWMasked256 - OpAMD64VPMULHUWMasked256 OpAMD64VPMAXUW256 + OpAMD64VPMAXUWMasked256 OpAMD64VPMINUW256 + OpAMD64VPMINUWMasked256 OpAMD64VPMULHUW256 + OpAMD64VPMULHUWMasked256 OpAMD64VPAVGW512 OpAMD64VPAVGWMasked512 - OpAMD64VPMAXUWMasked512 - OpAMD64VPMINUWMasked512 - OpAMD64VPMULHUWMasked512 OpAMD64VPMAXUW512 + OpAMD64VPMAXUWMasked512 OpAMD64VPMINUW512 + OpAMD64VPMINUWMasked512 OpAMD64VPMULHUW512 + OpAMD64VPMULHUWMasked512 OpAMD64VPAVGW128 OpAMD64VPAVGWMasked128 - OpAMD64VPMAXUWMasked128 - OpAMD64VPMINUWMasked128 - OpAMD64VPMULHUWMasked128 OpAMD64VPMAXUW128 + OpAMD64VPMAXUWMasked128 OpAMD64VPMINUW128 + OpAMD64VPMINUWMasked128 OpAMD64VPMULHUW128 - OpAMD64VPMAXUDMasked512 - OpAMD64VPMINUDMasked512 + OpAMD64VPMULHUWMasked128 OpAMD64VPMAXUD512 + OpAMD64VPMAXUDMasked512 OpAMD64VPMINUD512 - OpAMD64VPMAXUDMasked128 - OpAMD64VPMINUDMasked128 + OpAMD64VPMINUDMasked512 OpAMD64VPMAXUD128 + OpAMD64VPMAXUDMasked128 OpAMD64VPMINUD128 + OpAMD64VPMINUDMasked128 OpAMD64VPMULUDQ128 - OpAMD64VPMAXUDMasked256 - OpAMD64VPMINUDMasked256 OpAMD64VPMAXUD256 + OpAMD64VPMAXUDMasked256 OpAMD64VPMINUD256 + OpAMD64VPMINUDMasked256 OpAMD64VPMULUDQ256 + OpAMD64VPMAXUQ128 OpAMD64VPMAXUQMasked128 + OpAMD64VPMINUQ128 OpAMD64VPMINUQMasked128 OpAMD64VPMULUDQMasked128 - OpAMD64VPMAXUQ128 - OpAMD64VPMINUQ128 + OpAMD64VPMAXUQ256 OpAMD64VPMAXUQMasked256 + OpAMD64VPMINUQ256 OpAMD64VPMINUQMasked256 OpAMD64VPMULUDQMasked256 - OpAMD64VPMAXUQ256 - OpAMD64VPMINUQ256 - OpAMD64VPMAXUQMasked512 - OpAMD64VPMINUQMasked512 - OpAMD64VPMULUDQMasked512 OpAMD64VPMAXUQ512 + OpAMD64VPMAXUQMasked512 OpAMD64VPMINUQ512 + OpAMD64VPMINUQMasked512 OpAMD64VPMULUDQ512 + OpAMD64VPMULUDQMasked512 OpAMD64VPAVGB128 - OpAMD64VGF2P8MULB128 OpAMD64VPAVGBMasked128 + OpAMD64VGF2P8MULB128 OpAMD64VGF2P8MULBMasked128 - OpAMD64VPMAXUBMasked128 - OpAMD64VPMINUBMasked128 - OpAMD64VPMADDUBSWMasked128 OpAMD64VPMAXUB128 + OpAMD64VPMAXUBMasked128 OpAMD64VPMINUB128 + OpAMD64VPMINUBMasked128 OpAMD64VPMADDUBSW128 + OpAMD64VPMADDUBSWMasked128 OpAMD64VPAVGB256 - OpAMD64VGF2P8MULB256 OpAMD64VPAVGBMasked256 + OpAMD64VGF2P8MULB256 OpAMD64VGF2P8MULBMasked256 - OpAMD64VPMAXUBMasked256 - OpAMD64VPMINUBMasked256 - OpAMD64VPMADDUBSWMasked256 OpAMD64VPMAXUB256 + OpAMD64VPMAXUBMasked256 OpAMD64VPMINUB256 + OpAMD64VPMINUBMasked256 OpAMD64VPMADDUBSW256 + OpAMD64VPMADDUBSWMasked256 OpAMD64VPAVGB512 - OpAMD64VGF2P8MULB512 OpAMD64VPAVGBMasked512 + OpAMD64VGF2P8MULB512 OpAMD64VGF2P8MULBMasked512 - OpAMD64VPMAXUBMasked512 - OpAMD64VPMINUBMasked512 - OpAMD64VPMADDUBSWMasked512 OpAMD64VPMAXUB512 + OpAMD64VPMAXUBMasked512 OpAMD64VPMINUB512 + OpAMD64VPMINUBMasked512 OpAMD64VPMADDUBSW512 + OpAMD64VPMADDUBSWMasked512 OpAMD64VRNDSCALEPS512 - OpAMD64VREDUCEPS512 - OpAMD64VCMPPS512 OpAMD64VRNDSCALEPSMasked512 + OpAMD64VREDUCEPS512 OpAMD64VREDUCEPSMasked512 + OpAMD64VCMPPS512 OpAMD64VCMPPSMasked512 OpAMD64VROUNDPS128 OpAMD64VRNDSCALEPS128 - OpAMD64VREDUCEPS128 - OpAMD64VCMPPS128 OpAMD64VRNDSCALEPSMasked128 + OpAMD64VREDUCEPS128 OpAMD64VREDUCEPSMasked128 + OpAMD64VCMPPS128 OpAMD64VCMPPSMasked128 OpAMD64VROUNDPS256 OpAMD64VRNDSCALEPS256 - OpAMD64VREDUCEPS256 - OpAMD64VCMPPS256 - OpAMD64VEXTRACTF128128 OpAMD64VRNDSCALEPSMasked256 + OpAMD64VREDUCEPS256 OpAMD64VREDUCEPSMasked256 + OpAMD64VCMPPS256 OpAMD64VCMPPSMasked256 + OpAMD64VEXTRACTF128128 OpAMD64VINSERTF128256 OpAMD64VROUNDPD128 OpAMD64VRNDSCALEPD128 + OpAMD64VRNDSCALEPDMasked128 OpAMD64VREDUCEPD128 + OpAMD64VREDUCEPDMasked128 OpAMD64VDPPD128 OpAMD64VCMPPD128 - OpAMD64VRNDSCALEPDMasked128 - OpAMD64VREDUCEPDMasked128 OpAMD64VCMPPDMasked128 OpAMD64VROUNDPD256 OpAMD64VRNDSCALEPD256 - OpAMD64VREDUCEPD256 - OpAMD64VCMPPD256 OpAMD64VRNDSCALEPDMasked256 + OpAMD64VREDUCEPD256 OpAMD64VREDUCEPDMasked256 + OpAMD64VCMPPD256 OpAMD64VCMPPDMasked256 OpAMD64VRNDSCALEPD512 - OpAMD64VREDUCEPD512 - OpAMD64VCMPPD512 OpAMD64VRNDSCALEPDMasked512 + OpAMD64VREDUCEPD512 OpAMD64VREDUCEPDMasked512 + OpAMD64VCMPPD512 OpAMD64VCMPPDMasked512 - OpAMD64VPCMPW256 OpAMD64VPCMPWMasked256 - OpAMD64VPSHLDWMasked256 - OpAMD64VPSHRDWMasked256 + OpAMD64VPCMPW256 OpAMD64VPSHLDW256 + OpAMD64VPSHLDWMasked256 OpAMD64VPSHRDW256 + OpAMD64VPSHRDWMasked256 OpAMD64VPCMPW512 OpAMD64VPCMPWMasked512 - OpAMD64VPSHLDWMasked512 - OpAMD64VPSHRDWMasked512 OpAMD64VPSHLDW512 + OpAMD64VPSHLDWMasked512 OpAMD64VPSHRDW512 + OpAMD64VPSHRDWMasked512 + OpAMD64VPCMPWMasked128 OpAMD64VPEXTRW128 OpAMD64VPCMPW128 - OpAMD64VPCMPWMasked128 - OpAMD64VPSHLDWMasked128 - OpAMD64VPSHRDWMasked128 OpAMD64VPINSRW128 OpAMD64VPSHLDW128 + OpAMD64VPSHLDWMasked128 OpAMD64VPSHRDW128 + OpAMD64VPSHRDWMasked128 OpAMD64VPCMPD512 OpAMD64VPCMPDMasked512 - OpAMD64VPROLDMasked512 - OpAMD64VPRORDMasked512 - OpAMD64VPSHLDDMasked512 - OpAMD64VPSHRDDMasked512 OpAMD64VPROLD512 + OpAMD64VPROLDMasked512 OpAMD64VPRORD512 + OpAMD64VPRORDMasked512 OpAMD64VPSHLDD512 + OpAMD64VPSHLDDMasked512 OpAMD64VPSHRDD512 + OpAMD64VPSHRDDMasked512 + OpAMD64VPCMPDMasked128 OpAMD64VPEXTRD128 OpAMD64VPCMPD128 - OpAMD64VPCMPDMasked128 - OpAMD64VPROLDMasked128 - OpAMD64VPRORDMasked128 - OpAMD64VPSHLDDMasked128 - OpAMD64VPSHRDDMasked128 OpAMD64VPROLD128 + OpAMD64VPROLDMasked128 OpAMD64VPRORD128 + OpAMD64VPRORDMasked128 OpAMD64VPINSRD128 OpAMD64VPSHLDD128 + OpAMD64VPSHLDDMasked128 OpAMD64VPSHRDD128 - OpAMD64VPCMPD256 + OpAMD64VPSHRDDMasked128 OpAMD64VPCMPDMasked256 - OpAMD64VPROLDMasked256 - OpAMD64VPRORDMasked256 - OpAMD64VPSHLDDMasked256 - OpAMD64VPSHRDDMasked256 + OpAMD64VPCMPD256 OpAMD64VPROLD256 + OpAMD64VPROLDMasked256 OpAMD64VPRORD256 + OpAMD64VPRORDMasked256 OpAMD64VPSHLDD256 + OpAMD64VPSHLDDMasked256 OpAMD64VPSHRDD256 + OpAMD64VPSHRDDMasked256 + OpAMD64VPCMPQMasked128 OpAMD64VPEXTRQ128 OpAMD64VPCMPQ128 - OpAMD64VPCMPQMasked128 - OpAMD64VPROLQMasked128 - OpAMD64VPRORQMasked128 - OpAMD64VPSHLDQMasked128 - OpAMD64VPSHRDQMasked128 OpAMD64VPROLQ128 + OpAMD64VPROLQMasked128 OpAMD64VPRORQ128 + OpAMD64VPRORQMasked128 OpAMD64VPINSRQ128 OpAMD64VPSHLDQ128 + OpAMD64VPSHLDQMasked128 OpAMD64VPSHRDQ128 - OpAMD64VPCMPQ256 + OpAMD64VPSHRDQMasked128 OpAMD64VPCMPQMasked256 - OpAMD64VPROLQMasked256 - OpAMD64VPRORQMasked256 - OpAMD64VPSHLDQMasked256 - OpAMD64VPSHRDQMasked256 + OpAMD64VPCMPQ256 OpAMD64VPROLQ256 + OpAMD64VPROLQMasked256 OpAMD64VPRORQ256 + OpAMD64VPRORQMasked256 OpAMD64VPSHLDQ256 + OpAMD64VPSHLDQMasked256 OpAMD64VPSHRDQ256 + OpAMD64VPSHRDQMasked256 OpAMD64VPCMPQ512 OpAMD64VPCMPQMasked512 - OpAMD64VPROLQMasked512 - OpAMD64VPRORQMasked512 - OpAMD64VPSHLDQMasked512 - OpAMD64VPSHRDQMasked512 OpAMD64VPROLQ512 + OpAMD64VPROLQMasked512 OpAMD64VPRORQ512 + OpAMD64VPRORQMasked512 OpAMD64VPSHLDQ512 + OpAMD64VPSHLDQMasked512 OpAMD64VPSHRDQ512 + OpAMD64VPSHRDQMasked512 + OpAMD64VPCMPBMasked128 OpAMD64VPEXTRB128 OpAMD64VPCMPB128 - OpAMD64VPCMPBMasked128 OpAMD64VPINSRB128 + OpAMD64VPCMPBMasked256 OpAMD64VEXTRACTI128128 OpAMD64VPCMPB256 - OpAMD64VPCMPBMasked256 OpAMD64VINSERTI128256 OpAMD64VPCMPB512 OpAMD64VPCMPBMasked512 @@ -2049,23 +2049,23 @@ const ( OpAMD64VPCMPUQ512 OpAMD64VPCMPUQMasked512 OpAMD64VPCMPUB128 + OpAMD64VPCMPUBMasked128 OpAMD64VGF2P8AFFINEQB128 OpAMD64VGF2P8AFFINEINVQB128 - OpAMD64VPCMPUBMasked128 - OpAMD64VGF2P8AFFINEQBMasked128 OpAMD64VGF2P8AFFINEINVQBMasked128 + OpAMD64VGF2P8AFFINEQBMasked128 OpAMD64VPCMPUB256 + OpAMD64VPCMPUBMasked256 OpAMD64VGF2P8AFFINEQB256 OpAMD64VGF2P8AFFINEINVQB256 - OpAMD64VPCMPUBMasked256 - OpAMD64VGF2P8AFFINEQBMasked256 OpAMD64VGF2P8AFFINEINVQBMasked256 + OpAMD64VGF2P8AFFINEQBMasked256 OpAMD64VPCMPUB512 + OpAMD64VPCMPUBMasked512 OpAMD64VGF2P8AFFINEQB512 OpAMD64VGF2P8AFFINEINVQB512 - OpAMD64VPCMPUBMasked512 - OpAMD64VGF2P8AFFINEQBMasked512 OpAMD64VGF2P8AFFINEINVQBMasked512 + OpAMD64VGF2P8AFFINEQBMasked512 OpARMADD OpARMADDconst @@ -4293,1682 +4293,1682 @@ const ( OpAdd32x4 OpZeroSIMD OpAddFloat32x16 + OpAddMaskedFloat32x16 OpApproximateReciprocalFloat32x16 + OpApproximateReciprocalMaskedFloat32x16 OpApproximateReciprocalOfSqrtFloat32x16 + OpApproximateReciprocalOfSqrtMaskedFloat32x16 OpDivFloat32x16 + OpDivMaskedFloat32x16 OpEqualFloat32x16 + OpEqualMaskedFloat32x16 OpFusedMultiplyAddFloat32x16 + OpFusedMultiplyAddMaskedFloat32x16 OpFusedMultiplyAddSubFloat32x16 + OpFusedMultiplyAddSubMaskedFloat32x16 OpFusedMultiplySubAddFloat32x16 + OpFusedMultiplySubAddMaskedFloat32x16 OpGreaterFloat32x16 OpGreaterEqualFloat32x16 + OpGreaterEqualMaskedFloat32x16 + OpGreaterMaskedFloat32x16 OpIsNanFloat32x16 + OpIsNanMaskedFloat32x16 OpLessFloat32x16 OpLessEqualFloat32x16 - OpMaskedAddFloat32x16 - OpMaskedApproximateReciprocalFloat32x16 - OpMaskedApproximateReciprocalOfSqrtFloat32x16 - OpMaskedDivFloat32x16 - OpMaskedEqualFloat32x16 - OpMaskedFusedMultiplyAddFloat32x16 - OpMaskedFusedMultiplyAddSubFloat32x16 - OpMaskedFusedMultiplySubAddFloat32x16 - OpMaskedGreaterFloat32x16 - OpMaskedGreaterEqualFloat32x16 - OpMaskedIsNanFloat32x16 - OpMaskedLessFloat32x16 - OpMaskedLessEqualFloat32x16 - OpMaskedMaxFloat32x16 - OpMaskedMinFloat32x16 - OpMaskedMulFloat32x16 - OpMaskedMulByPowOf2Float32x16 - OpMaskedNotEqualFloat32x16 - OpMaskedSqrtFloat32x16 - OpMaskedSubFloat32x16 + OpLessEqualMaskedFloat32x16 + OpLessMaskedFloat32x16 OpMaxFloat32x16 + OpMaxMaskedFloat32x16 OpMinFloat32x16 + OpMinMaskedFloat32x16 OpMulFloat32x16 OpMulByPowOf2Float32x16 + OpMulByPowOf2MaskedFloat32x16 + OpMulMaskedFloat32x16 OpNotEqualFloat32x16 + OpNotEqualMaskedFloat32x16 OpSqrtFloat32x16 + OpSqrtMaskedFloat32x16 OpSubFloat32x16 + OpSubMaskedFloat32x16 OpAddFloat32x4 + OpAddMaskedFloat32x4 OpAddSubFloat32x4 OpApproximateReciprocalFloat32x4 + OpApproximateReciprocalMaskedFloat32x4 OpApproximateReciprocalOfSqrtFloat32x4 + OpApproximateReciprocalOfSqrtMaskedFloat32x4 OpCeilFloat32x4 OpDivFloat32x4 + OpDivMaskedFloat32x4 OpEqualFloat32x4 + OpEqualMaskedFloat32x4 OpFloorFloat32x4 OpFusedMultiplyAddFloat32x4 + OpFusedMultiplyAddMaskedFloat32x4 OpFusedMultiplyAddSubFloat32x4 + OpFusedMultiplyAddSubMaskedFloat32x4 OpFusedMultiplySubAddFloat32x4 + OpFusedMultiplySubAddMaskedFloat32x4 OpGreaterFloat32x4 OpGreaterEqualFloat32x4 + OpGreaterEqualMaskedFloat32x4 + OpGreaterMaskedFloat32x4 OpIsNanFloat32x4 + OpIsNanMaskedFloat32x4 OpLessFloat32x4 OpLessEqualFloat32x4 - OpMaskedAddFloat32x4 - OpMaskedApproximateReciprocalFloat32x4 - OpMaskedApproximateReciprocalOfSqrtFloat32x4 - OpMaskedDivFloat32x4 - OpMaskedEqualFloat32x4 - OpMaskedFusedMultiplyAddFloat32x4 - OpMaskedFusedMultiplyAddSubFloat32x4 - OpMaskedFusedMultiplySubAddFloat32x4 - OpMaskedGreaterFloat32x4 - OpMaskedGreaterEqualFloat32x4 - OpMaskedIsNanFloat32x4 - OpMaskedLessFloat32x4 - OpMaskedLessEqualFloat32x4 - OpMaskedMaxFloat32x4 - OpMaskedMinFloat32x4 - OpMaskedMulFloat32x4 - OpMaskedMulByPowOf2Float32x4 - OpMaskedNotEqualFloat32x4 - OpMaskedSqrtFloat32x4 - OpMaskedSubFloat32x4 + OpLessEqualMaskedFloat32x4 + OpLessMaskedFloat32x4 OpMaxFloat32x4 + OpMaxMaskedFloat32x4 OpMinFloat32x4 + OpMinMaskedFloat32x4 OpMulFloat32x4 OpMulByPowOf2Float32x4 + OpMulByPowOf2MaskedFloat32x4 + OpMulMaskedFloat32x4 OpNotEqualFloat32x4 + OpNotEqualMaskedFloat32x4 OpPairwiseAddFloat32x4 OpPairwiseSubFloat32x4 OpRoundFloat32x4 OpSqrtFloat32x4 + OpSqrtMaskedFloat32x4 OpSubFloat32x4 + OpSubMaskedFloat32x4 OpTruncFloat32x4 OpAddFloat32x8 + OpAddMaskedFloat32x8 OpAddSubFloat32x8 OpApproximateReciprocalFloat32x8 + OpApproximateReciprocalMaskedFloat32x8 OpApproximateReciprocalOfSqrtFloat32x8 + OpApproximateReciprocalOfSqrtMaskedFloat32x8 OpCeilFloat32x8 OpDivFloat32x8 + OpDivMaskedFloat32x8 OpEqualFloat32x8 + OpEqualMaskedFloat32x8 OpFloorFloat32x8 OpFusedMultiplyAddFloat32x8 + OpFusedMultiplyAddMaskedFloat32x8 OpFusedMultiplyAddSubFloat32x8 + OpFusedMultiplyAddSubMaskedFloat32x8 OpFusedMultiplySubAddFloat32x8 + OpFusedMultiplySubAddMaskedFloat32x8 OpGreaterFloat32x8 OpGreaterEqualFloat32x8 + OpGreaterEqualMaskedFloat32x8 + OpGreaterMaskedFloat32x8 OpIsNanFloat32x8 + OpIsNanMaskedFloat32x8 OpLessFloat32x8 OpLessEqualFloat32x8 - OpMaskedAddFloat32x8 - OpMaskedApproximateReciprocalFloat32x8 - OpMaskedApproximateReciprocalOfSqrtFloat32x8 - OpMaskedDivFloat32x8 - OpMaskedEqualFloat32x8 - OpMaskedFusedMultiplyAddFloat32x8 - OpMaskedFusedMultiplyAddSubFloat32x8 - OpMaskedFusedMultiplySubAddFloat32x8 - OpMaskedGreaterFloat32x8 - OpMaskedGreaterEqualFloat32x8 - OpMaskedIsNanFloat32x8 - OpMaskedLessFloat32x8 - OpMaskedLessEqualFloat32x8 - OpMaskedMaxFloat32x8 - OpMaskedMinFloat32x8 - OpMaskedMulFloat32x8 - OpMaskedMulByPowOf2Float32x8 - OpMaskedNotEqualFloat32x8 - OpMaskedSqrtFloat32x8 - OpMaskedSubFloat32x8 + OpLessEqualMaskedFloat32x8 + OpLessMaskedFloat32x8 OpMaxFloat32x8 + OpMaxMaskedFloat32x8 OpMinFloat32x8 + OpMinMaskedFloat32x8 OpMulFloat32x8 OpMulByPowOf2Float32x8 + OpMulByPowOf2MaskedFloat32x8 + OpMulMaskedFloat32x8 OpNotEqualFloat32x8 + OpNotEqualMaskedFloat32x8 OpPairwiseAddFloat32x8 OpPairwiseSubFloat32x8 OpRoundFloat32x8 OpSqrtFloat32x8 + OpSqrtMaskedFloat32x8 OpSubFloat32x8 + OpSubMaskedFloat32x8 OpTruncFloat32x8 OpAddFloat64x2 + OpAddMaskedFloat64x2 OpAddSubFloat64x2 OpApproximateReciprocalFloat64x2 + OpApproximateReciprocalMaskedFloat64x2 OpApproximateReciprocalOfSqrtFloat64x2 + OpApproximateReciprocalOfSqrtMaskedFloat64x2 OpCeilFloat64x2 OpDivFloat64x2 + OpDivMaskedFloat64x2 OpDotProdBroadcastFloat64x2 OpEqualFloat64x2 + OpEqualMaskedFloat64x2 OpFloorFloat64x2 OpFusedMultiplyAddFloat64x2 + OpFusedMultiplyAddMaskedFloat64x2 OpFusedMultiplyAddSubFloat64x2 + OpFusedMultiplyAddSubMaskedFloat64x2 OpFusedMultiplySubAddFloat64x2 + OpFusedMultiplySubAddMaskedFloat64x2 OpGreaterFloat64x2 OpGreaterEqualFloat64x2 + OpGreaterEqualMaskedFloat64x2 + OpGreaterMaskedFloat64x2 OpIsNanFloat64x2 + OpIsNanMaskedFloat64x2 OpLessFloat64x2 OpLessEqualFloat64x2 - OpMaskedAddFloat64x2 - OpMaskedApproximateReciprocalFloat64x2 - OpMaskedApproximateReciprocalOfSqrtFloat64x2 - OpMaskedDivFloat64x2 - OpMaskedEqualFloat64x2 - OpMaskedFusedMultiplyAddFloat64x2 - OpMaskedFusedMultiplyAddSubFloat64x2 - OpMaskedFusedMultiplySubAddFloat64x2 - OpMaskedGreaterFloat64x2 - OpMaskedGreaterEqualFloat64x2 - OpMaskedIsNanFloat64x2 - OpMaskedLessFloat64x2 - OpMaskedLessEqualFloat64x2 - OpMaskedMaxFloat64x2 - OpMaskedMinFloat64x2 - OpMaskedMulFloat64x2 - OpMaskedMulByPowOf2Float64x2 - OpMaskedNotEqualFloat64x2 - OpMaskedSqrtFloat64x2 - OpMaskedSubFloat64x2 + OpLessEqualMaskedFloat64x2 + OpLessMaskedFloat64x2 OpMaxFloat64x2 + OpMaxMaskedFloat64x2 OpMinFloat64x2 + OpMinMaskedFloat64x2 OpMulFloat64x2 OpMulByPowOf2Float64x2 + OpMulByPowOf2MaskedFloat64x2 + OpMulMaskedFloat64x2 OpNotEqualFloat64x2 + OpNotEqualMaskedFloat64x2 OpPairwiseAddFloat64x2 OpPairwiseSubFloat64x2 OpRoundFloat64x2 OpSqrtFloat64x2 + OpSqrtMaskedFloat64x2 OpSubFloat64x2 + OpSubMaskedFloat64x2 OpTruncFloat64x2 OpAddFloat64x4 + OpAddMaskedFloat64x4 OpAddSubFloat64x4 OpApproximateReciprocalFloat64x4 + OpApproximateReciprocalMaskedFloat64x4 OpApproximateReciprocalOfSqrtFloat64x4 + OpApproximateReciprocalOfSqrtMaskedFloat64x4 OpCeilFloat64x4 OpDivFloat64x4 + OpDivMaskedFloat64x4 OpEqualFloat64x4 + OpEqualMaskedFloat64x4 OpFloorFloat64x4 OpFusedMultiplyAddFloat64x4 + OpFusedMultiplyAddMaskedFloat64x4 OpFusedMultiplyAddSubFloat64x4 + OpFusedMultiplyAddSubMaskedFloat64x4 OpFusedMultiplySubAddFloat64x4 + OpFusedMultiplySubAddMaskedFloat64x4 OpGreaterFloat64x4 OpGreaterEqualFloat64x4 + OpGreaterEqualMaskedFloat64x4 + OpGreaterMaskedFloat64x4 OpIsNanFloat64x4 + OpIsNanMaskedFloat64x4 OpLessFloat64x4 OpLessEqualFloat64x4 - OpMaskedAddFloat64x4 - OpMaskedApproximateReciprocalFloat64x4 - OpMaskedApproximateReciprocalOfSqrtFloat64x4 - OpMaskedDivFloat64x4 - OpMaskedEqualFloat64x4 - OpMaskedFusedMultiplyAddFloat64x4 - OpMaskedFusedMultiplyAddSubFloat64x4 - OpMaskedFusedMultiplySubAddFloat64x4 - OpMaskedGreaterFloat64x4 - OpMaskedGreaterEqualFloat64x4 - OpMaskedIsNanFloat64x4 - OpMaskedLessFloat64x4 - OpMaskedLessEqualFloat64x4 - OpMaskedMaxFloat64x4 - OpMaskedMinFloat64x4 - OpMaskedMulFloat64x4 - OpMaskedMulByPowOf2Float64x4 - OpMaskedNotEqualFloat64x4 - OpMaskedSqrtFloat64x4 - OpMaskedSubFloat64x4 + OpLessEqualMaskedFloat64x4 + OpLessMaskedFloat64x4 OpMaxFloat64x4 + OpMaxMaskedFloat64x4 OpMinFloat64x4 + OpMinMaskedFloat64x4 OpMulFloat64x4 OpMulByPowOf2Float64x4 + OpMulByPowOf2MaskedFloat64x4 + OpMulMaskedFloat64x4 OpNotEqualFloat64x4 + OpNotEqualMaskedFloat64x4 OpPairwiseAddFloat64x4 OpPairwiseSubFloat64x4 OpRoundFloat64x4 OpSqrtFloat64x4 + OpSqrtMaskedFloat64x4 OpSubFloat64x4 + OpSubMaskedFloat64x4 OpTruncFloat64x4 OpAddFloat64x8 + OpAddMaskedFloat64x8 OpApproximateReciprocalFloat64x8 + OpApproximateReciprocalMaskedFloat64x8 OpApproximateReciprocalOfSqrtFloat64x8 + OpApproximateReciprocalOfSqrtMaskedFloat64x8 OpDivFloat64x8 + OpDivMaskedFloat64x8 OpEqualFloat64x8 + OpEqualMaskedFloat64x8 OpFusedMultiplyAddFloat64x8 + OpFusedMultiplyAddMaskedFloat64x8 OpFusedMultiplyAddSubFloat64x8 + OpFusedMultiplyAddSubMaskedFloat64x8 OpFusedMultiplySubAddFloat64x8 + OpFusedMultiplySubAddMaskedFloat64x8 OpGreaterFloat64x8 OpGreaterEqualFloat64x8 + OpGreaterEqualMaskedFloat64x8 + OpGreaterMaskedFloat64x8 OpIsNanFloat64x8 + OpIsNanMaskedFloat64x8 OpLessFloat64x8 OpLessEqualFloat64x8 - OpMaskedAddFloat64x8 - OpMaskedApproximateReciprocalFloat64x8 - OpMaskedApproximateReciprocalOfSqrtFloat64x8 - OpMaskedDivFloat64x8 - OpMaskedEqualFloat64x8 - OpMaskedFusedMultiplyAddFloat64x8 - OpMaskedFusedMultiplyAddSubFloat64x8 - OpMaskedFusedMultiplySubAddFloat64x8 - OpMaskedGreaterFloat64x8 - OpMaskedGreaterEqualFloat64x8 - OpMaskedIsNanFloat64x8 - OpMaskedLessFloat64x8 - OpMaskedLessEqualFloat64x8 - OpMaskedMaxFloat64x8 - OpMaskedMinFloat64x8 - OpMaskedMulFloat64x8 - OpMaskedMulByPowOf2Float64x8 - OpMaskedNotEqualFloat64x8 - OpMaskedSqrtFloat64x8 - OpMaskedSubFloat64x8 + OpLessEqualMaskedFloat64x8 + OpLessMaskedFloat64x8 OpMaxFloat64x8 + OpMaxMaskedFloat64x8 OpMinFloat64x8 + OpMinMaskedFloat64x8 OpMulFloat64x8 OpMulByPowOf2Float64x8 + OpMulByPowOf2MaskedFloat64x8 + OpMulMaskedFloat64x8 OpNotEqualFloat64x8 + OpNotEqualMaskedFloat64x8 OpSqrtFloat64x8 + OpSqrtMaskedFloat64x8 OpSubFloat64x8 + OpSubMaskedFloat64x8 OpAbsoluteInt16x16 + OpAbsoluteMaskedInt16x16 OpAddInt16x16 + OpAddMaskedInt16x16 OpAndInt16x16 OpAndNotInt16x16 OpEqualInt16x16 + OpEqualMaskedInt16x16 OpGreaterInt16x16 OpGreaterEqualInt16x16 + OpGreaterEqualMaskedInt16x16 + OpGreaterMaskedInt16x16 OpLessInt16x16 OpLessEqualInt16x16 - OpMaskedAbsoluteInt16x16 - OpMaskedAddInt16x16 - OpMaskedEqualInt16x16 - OpMaskedGreaterInt16x16 - OpMaskedGreaterEqualInt16x16 - OpMaskedLessInt16x16 - OpMaskedLessEqualInt16x16 - OpMaskedMaxInt16x16 - OpMaskedMinInt16x16 - OpMaskedMulHighInt16x16 - OpMaskedMulLowInt16x16 - OpMaskedNotEqualInt16x16 - OpMaskedPairDotProdInt16x16 - OpMaskedPopCountInt16x16 - OpMaskedSaturatedAddInt16x16 - OpMaskedSaturatedSubInt16x16 - OpMaskedShiftLeftInt16x16 - OpMaskedShiftLeftAndFillUpperFromInt16x16 - OpMaskedShiftRightInt16x16 - OpMaskedShiftRightAndFillUpperFromInt16x16 - OpMaskedShiftRightSignExtendedInt16x16 - OpMaskedSubInt16x16 + OpLessEqualMaskedInt16x16 + OpLessMaskedInt16x16 OpMaxInt16x16 + OpMaxMaskedInt16x16 OpMinInt16x16 + OpMinMaskedInt16x16 OpMulHighInt16x16 + OpMulHighMaskedInt16x16 OpMulLowInt16x16 + OpMulLowMaskedInt16x16 OpNotEqualInt16x16 + OpNotEqualMaskedInt16x16 OpOrInt16x16 OpPairDotProdInt16x16 + OpPairDotProdMaskedInt16x16 OpPairwiseAddInt16x16 OpPairwiseSubInt16x16 OpPopCountInt16x16 + OpPopCountMaskedInt16x16 OpSaturatedAddInt16x16 + OpSaturatedAddMaskedInt16x16 OpSaturatedPairwiseAddInt16x16 OpSaturatedPairwiseSubInt16x16 OpSaturatedSubInt16x16 + OpSaturatedSubMaskedInt16x16 OpShiftAllLeftInt16x16 OpShiftAllRightInt16x16 OpShiftAllRightSignExtendedInt16x16 OpShiftLeftInt16x16 OpShiftLeftAndFillUpperFromInt16x16 + OpShiftLeftAndFillUpperFromMaskedInt16x16 + OpShiftLeftMaskedInt16x16 OpShiftRightInt16x16 OpShiftRightAndFillUpperFromInt16x16 + OpShiftRightAndFillUpperFromMaskedInt16x16 + OpShiftRightMaskedInt16x16 OpShiftRightSignExtendedInt16x16 + OpShiftRightSignExtendedMaskedInt16x16 OpSignInt16x16 OpSubInt16x16 + OpSubMaskedInt16x16 OpXorInt16x16 OpAbsoluteInt16x32 + OpAbsoluteMaskedInt16x32 OpAddInt16x32 + OpAddMaskedInt16x32 OpEqualInt16x32 + OpEqualMaskedInt16x32 OpGreaterInt16x32 OpGreaterEqualInt16x32 + OpGreaterEqualMaskedInt16x32 + OpGreaterMaskedInt16x32 OpLessInt16x32 OpLessEqualInt16x32 - OpMaskedAbsoluteInt16x32 - OpMaskedAddInt16x32 - OpMaskedEqualInt16x32 - OpMaskedGreaterInt16x32 - OpMaskedGreaterEqualInt16x32 - OpMaskedLessInt16x32 - OpMaskedLessEqualInt16x32 - OpMaskedMaxInt16x32 - OpMaskedMinInt16x32 - OpMaskedMulHighInt16x32 - OpMaskedMulLowInt16x32 - OpMaskedNotEqualInt16x32 - OpMaskedPairDotProdInt16x32 - OpMaskedPopCountInt16x32 - OpMaskedSaturatedAddInt16x32 - OpMaskedSaturatedSubInt16x32 - OpMaskedShiftLeftInt16x32 - OpMaskedShiftLeftAndFillUpperFromInt16x32 - OpMaskedShiftRightInt16x32 - OpMaskedShiftRightAndFillUpperFromInt16x32 - OpMaskedShiftRightSignExtendedInt16x32 - OpMaskedSubInt16x32 + OpLessEqualMaskedInt16x32 + OpLessMaskedInt16x32 OpMaxInt16x32 + OpMaxMaskedInt16x32 OpMinInt16x32 + OpMinMaskedInt16x32 OpMulHighInt16x32 + OpMulHighMaskedInt16x32 OpMulLowInt16x32 + OpMulLowMaskedInt16x32 OpNotEqualInt16x32 + OpNotEqualMaskedInt16x32 OpPairDotProdInt16x32 + OpPairDotProdMaskedInt16x32 OpPopCountInt16x32 + OpPopCountMaskedInt16x32 OpSaturatedAddInt16x32 + OpSaturatedAddMaskedInt16x32 OpSaturatedSubInt16x32 + OpSaturatedSubMaskedInt16x32 OpShiftLeftInt16x32 OpShiftLeftAndFillUpperFromInt16x32 + OpShiftLeftAndFillUpperFromMaskedInt16x32 + OpShiftLeftMaskedInt16x32 OpShiftRightInt16x32 OpShiftRightAndFillUpperFromInt16x32 + OpShiftRightAndFillUpperFromMaskedInt16x32 + OpShiftRightMaskedInt16x32 OpShiftRightSignExtendedInt16x32 + OpShiftRightSignExtendedMaskedInt16x32 OpSubInt16x32 + OpSubMaskedInt16x32 OpAbsoluteInt16x8 + OpAbsoluteMaskedInt16x8 OpAddInt16x8 + OpAddMaskedInt16x8 OpAndInt16x8 OpAndNotInt16x8 OpEqualInt16x8 + OpEqualMaskedInt16x8 OpGreaterInt16x8 OpGreaterEqualInt16x8 + OpGreaterEqualMaskedInt16x8 + OpGreaterMaskedInt16x8 OpLessInt16x8 OpLessEqualInt16x8 - OpMaskedAbsoluteInt16x8 - OpMaskedAddInt16x8 - OpMaskedEqualInt16x8 - OpMaskedGreaterInt16x8 - OpMaskedGreaterEqualInt16x8 - OpMaskedLessInt16x8 - OpMaskedLessEqualInt16x8 - OpMaskedMaxInt16x8 - OpMaskedMinInt16x8 - OpMaskedMulHighInt16x8 - OpMaskedMulLowInt16x8 - OpMaskedNotEqualInt16x8 - OpMaskedPairDotProdInt16x8 - OpMaskedPopCountInt16x8 - OpMaskedSaturatedAddInt16x8 - OpMaskedSaturatedSubInt16x8 - OpMaskedShiftLeftInt16x8 - OpMaskedShiftLeftAndFillUpperFromInt16x8 - OpMaskedShiftRightInt16x8 - OpMaskedShiftRightAndFillUpperFromInt16x8 - OpMaskedShiftRightSignExtendedInt16x8 - OpMaskedSubInt16x8 + OpLessEqualMaskedInt16x8 + OpLessMaskedInt16x8 OpMaxInt16x8 + OpMaxMaskedInt16x8 OpMinInt16x8 + OpMinMaskedInt16x8 OpMulHighInt16x8 + OpMulHighMaskedInt16x8 OpMulLowInt16x8 + OpMulLowMaskedInt16x8 OpNotEqualInt16x8 + OpNotEqualMaskedInt16x8 OpOrInt16x8 OpPairDotProdInt16x8 + OpPairDotProdMaskedInt16x8 OpPairwiseAddInt16x8 OpPairwiseSubInt16x8 OpPopCountInt16x8 + OpPopCountMaskedInt16x8 OpSaturatedAddInt16x8 + OpSaturatedAddMaskedInt16x8 OpSaturatedPairwiseAddInt16x8 OpSaturatedPairwiseSubInt16x8 OpSaturatedSubInt16x8 + OpSaturatedSubMaskedInt16x8 OpShiftAllLeftInt16x8 OpShiftAllRightInt16x8 OpShiftAllRightSignExtendedInt16x8 OpShiftLeftInt16x8 OpShiftLeftAndFillUpperFromInt16x8 + OpShiftLeftAndFillUpperFromMaskedInt16x8 + OpShiftLeftMaskedInt16x8 OpShiftRightInt16x8 OpShiftRightAndFillUpperFromInt16x8 + OpShiftRightAndFillUpperFromMaskedInt16x8 + OpShiftRightMaskedInt16x8 OpShiftRightSignExtendedInt16x8 + OpShiftRightSignExtendedMaskedInt16x8 OpSignInt16x8 OpSubInt16x8 + OpSubMaskedInt16x8 OpXorInt16x8 OpAbsoluteInt32x16 + OpAbsoluteMaskedInt32x16 OpAddInt32x16 + OpAddMaskedInt32x16 OpAndInt32x16 + OpAndMaskedInt32x16 OpAndNotInt32x16 + OpAndNotMaskedInt32x16 OpEqualInt32x16 + OpEqualMaskedInt32x16 OpGreaterInt32x16 OpGreaterEqualInt32x16 + OpGreaterEqualMaskedInt32x16 + OpGreaterMaskedInt32x16 OpLessInt32x16 OpLessEqualInt32x16 - OpMaskedAbsoluteInt32x16 - OpMaskedAddInt32x16 - OpMaskedAndInt32x16 - OpMaskedAndNotInt32x16 - OpMaskedEqualInt32x16 - OpMaskedGreaterInt32x16 - OpMaskedGreaterEqualInt32x16 - OpMaskedLessInt32x16 - OpMaskedLessEqualInt32x16 - OpMaskedMaxInt32x16 - OpMaskedMinInt32x16 - OpMaskedMulLowInt32x16 - OpMaskedNotEqualInt32x16 - OpMaskedOrInt32x16 - OpMaskedPairDotProdAccumulateInt32x16 - OpMaskedPopCountInt32x16 - OpMaskedRotateLeftInt32x16 - OpMaskedRotateRightInt32x16 - OpMaskedSaturatedPairDotProdAccumulateInt32x16 - OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16 - OpMaskedShiftLeftInt32x16 - OpMaskedShiftLeftAndFillUpperFromInt32x16 - OpMaskedShiftRightInt32x16 - OpMaskedShiftRightAndFillUpperFromInt32x16 - OpMaskedShiftRightSignExtendedInt32x16 - OpMaskedSubInt32x16 - OpMaskedUnsignedSignedQuadDotProdAccumulateInt32x16 - OpMaskedXorInt32x16 + OpLessEqualMaskedInt32x16 + OpLessMaskedInt32x16 OpMaxInt32x16 + OpMaxMaskedInt32x16 OpMinInt32x16 + OpMinMaskedInt32x16 OpMulLowInt32x16 + OpMulLowMaskedInt32x16 OpNotEqualInt32x16 + OpNotEqualMaskedInt32x16 OpOrInt32x16 + OpOrMaskedInt32x16 OpPairDotProdAccumulateInt32x16 + OpPairDotProdAccumulateMaskedInt32x16 OpPopCountInt32x16 + OpPopCountMaskedInt32x16 OpRotateLeftInt32x16 + OpRotateLeftMaskedInt32x16 OpRotateRightInt32x16 + OpRotateRightMaskedInt32x16 OpSaturatedPairDotProdAccumulateInt32x16 + OpSaturatedPairDotProdAccumulateMaskedInt32x16 OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16 + OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x16 OpShiftLeftInt32x16 OpShiftLeftAndFillUpperFromInt32x16 + OpShiftLeftAndFillUpperFromMaskedInt32x16 + OpShiftLeftMaskedInt32x16 OpShiftRightInt32x16 OpShiftRightAndFillUpperFromInt32x16 + OpShiftRightAndFillUpperFromMaskedInt32x16 + OpShiftRightMaskedInt32x16 OpShiftRightSignExtendedInt32x16 + OpShiftRightSignExtendedMaskedInt32x16 OpSubInt32x16 + OpSubMaskedInt32x16 OpUnsignedSignedQuadDotProdAccumulateInt32x16 + OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x16 OpXorInt32x16 + OpXorMaskedInt32x16 OpAbsoluteInt32x4 + OpAbsoluteMaskedInt32x4 OpAddInt32x4 + OpAddMaskedInt32x4 OpAndInt32x4 + OpAndMaskedInt32x4 OpAndNotInt32x4 + OpAndNotMaskedInt32x4 OpEqualInt32x4 + OpEqualMaskedInt32x4 OpGreaterInt32x4 OpGreaterEqualInt32x4 + OpGreaterEqualMaskedInt32x4 + OpGreaterMaskedInt32x4 OpLessInt32x4 OpLessEqualInt32x4 - OpMaskedAbsoluteInt32x4 - OpMaskedAddInt32x4 - OpMaskedAndInt32x4 - OpMaskedAndNotInt32x4 - OpMaskedEqualInt32x4 - OpMaskedGreaterInt32x4 - OpMaskedGreaterEqualInt32x4 - OpMaskedLessInt32x4 - OpMaskedLessEqualInt32x4 - OpMaskedMaxInt32x4 - OpMaskedMinInt32x4 - OpMaskedMulLowInt32x4 - OpMaskedNotEqualInt32x4 - OpMaskedOrInt32x4 - OpMaskedPairDotProdAccumulateInt32x4 - OpMaskedPopCountInt32x4 - OpMaskedRotateLeftInt32x4 - OpMaskedRotateRightInt32x4 - OpMaskedSaturatedPairDotProdAccumulateInt32x4 - OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4 - OpMaskedShiftLeftInt32x4 - OpMaskedShiftLeftAndFillUpperFromInt32x4 - OpMaskedShiftRightInt32x4 - OpMaskedShiftRightAndFillUpperFromInt32x4 - OpMaskedShiftRightSignExtendedInt32x4 - OpMaskedSubInt32x4 - OpMaskedUnsignedSignedQuadDotProdAccumulateInt32x4 - OpMaskedXorInt32x4 + OpLessEqualMaskedInt32x4 + OpLessMaskedInt32x4 OpMaxInt32x4 + OpMaxMaskedInt32x4 OpMinInt32x4 + OpMinMaskedInt32x4 OpMulEvenWidenInt32x4 OpMulLowInt32x4 + OpMulLowMaskedInt32x4 OpNotEqualInt32x4 + OpNotEqualMaskedInt32x4 OpOrInt32x4 + OpOrMaskedInt32x4 OpPairDotProdAccumulateInt32x4 + OpPairDotProdAccumulateMaskedInt32x4 OpPairwiseAddInt32x4 OpPairwiseSubInt32x4 OpPopCountInt32x4 + OpPopCountMaskedInt32x4 OpRotateLeftInt32x4 + OpRotateLeftMaskedInt32x4 OpRotateRightInt32x4 + OpRotateRightMaskedInt32x4 OpSaturatedPairDotProdAccumulateInt32x4 + OpSaturatedPairDotProdAccumulateMaskedInt32x4 OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4 + OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x4 OpShiftAllLeftInt32x4 OpShiftAllRightInt32x4 OpShiftAllRightSignExtendedInt32x4 OpShiftLeftInt32x4 OpShiftLeftAndFillUpperFromInt32x4 + OpShiftLeftAndFillUpperFromMaskedInt32x4 + OpShiftLeftMaskedInt32x4 OpShiftRightInt32x4 OpShiftRightAndFillUpperFromInt32x4 + OpShiftRightAndFillUpperFromMaskedInt32x4 + OpShiftRightMaskedInt32x4 OpShiftRightSignExtendedInt32x4 + OpShiftRightSignExtendedMaskedInt32x4 OpSignInt32x4 OpSubInt32x4 + OpSubMaskedInt32x4 OpUnsignedSignedQuadDotProdAccumulateInt32x4 + OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x4 OpXorInt32x4 + OpXorMaskedInt32x4 OpAbsoluteInt32x8 + OpAbsoluteMaskedInt32x8 OpAddInt32x8 + OpAddMaskedInt32x8 OpAndInt32x8 + OpAndMaskedInt32x8 OpAndNotInt32x8 + OpAndNotMaskedInt32x8 OpEqualInt32x8 + OpEqualMaskedInt32x8 OpGreaterInt32x8 OpGreaterEqualInt32x8 + OpGreaterEqualMaskedInt32x8 + OpGreaterMaskedInt32x8 OpLessInt32x8 OpLessEqualInt32x8 - OpMaskedAbsoluteInt32x8 - OpMaskedAddInt32x8 - OpMaskedAndInt32x8 - OpMaskedAndNotInt32x8 - OpMaskedEqualInt32x8 - OpMaskedGreaterInt32x8 - OpMaskedGreaterEqualInt32x8 - OpMaskedLessInt32x8 - OpMaskedLessEqualInt32x8 - OpMaskedMaxInt32x8 - OpMaskedMinInt32x8 - OpMaskedMulLowInt32x8 - OpMaskedNotEqualInt32x8 - OpMaskedOrInt32x8 - OpMaskedPairDotProdAccumulateInt32x8 - OpMaskedPopCountInt32x8 - OpMaskedRotateLeftInt32x8 - OpMaskedRotateRightInt32x8 - OpMaskedSaturatedPairDotProdAccumulateInt32x8 - OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8 - OpMaskedShiftLeftInt32x8 - OpMaskedShiftLeftAndFillUpperFromInt32x8 - OpMaskedShiftRightInt32x8 - OpMaskedShiftRightAndFillUpperFromInt32x8 - OpMaskedShiftRightSignExtendedInt32x8 - OpMaskedSubInt32x8 - OpMaskedUnsignedSignedQuadDotProdAccumulateInt32x8 - OpMaskedXorInt32x8 + OpLessEqualMaskedInt32x8 + OpLessMaskedInt32x8 OpMaxInt32x8 + OpMaxMaskedInt32x8 OpMinInt32x8 + OpMinMaskedInt32x8 OpMulEvenWidenInt32x8 OpMulLowInt32x8 + OpMulLowMaskedInt32x8 OpNotEqualInt32x8 + OpNotEqualMaskedInt32x8 OpOrInt32x8 + OpOrMaskedInt32x8 OpPairDotProdAccumulateInt32x8 + OpPairDotProdAccumulateMaskedInt32x8 OpPairwiseAddInt32x8 OpPairwiseSubInt32x8 OpPopCountInt32x8 + OpPopCountMaskedInt32x8 OpRotateLeftInt32x8 + OpRotateLeftMaskedInt32x8 OpRotateRightInt32x8 + OpRotateRightMaskedInt32x8 OpSaturatedPairDotProdAccumulateInt32x8 + OpSaturatedPairDotProdAccumulateMaskedInt32x8 OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8 + OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x8 OpShiftAllLeftInt32x8 OpShiftAllRightInt32x8 OpShiftAllRightSignExtendedInt32x8 OpShiftLeftInt32x8 OpShiftLeftAndFillUpperFromInt32x8 + OpShiftLeftAndFillUpperFromMaskedInt32x8 + OpShiftLeftMaskedInt32x8 OpShiftRightInt32x8 OpShiftRightAndFillUpperFromInt32x8 + OpShiftRightAndFillUpperFromMaskedInt32x8 + OpShiftRightMaskedInt32x8 OpShiftRightSignExtendedInt32x8 + OpShiftRightSignExtendedMaskedInt32x8 OpSignInt32x8 OpSubInt32x8 + OpSubMaskedInt32x8 OpUnsignedSignedQuadDotProdAccumulateInt32x8 + OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x8 OpXorInt32x8 + OpXorMaskedInt32x8 OpAbsoluteInt64x2 + OpAbsoluteMaskedInt64x2 OpAddInt64x2 + OpAddMaskedInt64x2 OpAndInt64x2 + OpAndMaskedInt64x2 OpAndNotInt64x2 + OpAndNotMaskedInt64x2 OpEqualInt64x2 + OpEqualMaskedInt64x2 OpGreaterInt64x2 OpGreaterEqualInt64x2 + OpGreaterEqualMaskedInt64x2 + OpGreaterMaskedInt64x2 OpLessInt64x2 OpLessEqualInt64x2 - OpMaskedAbsoluteInt64x2 - OpMaskedAddInt64x2 - OpMaskedAndInt64x2 - OpMaskedAndNotInt64x2 - OpMaskedEqualInt64x2 - OpMaskedGreaterInt64x2 - OpMaskedGreaterEqualInt64x2 - OpMaskedLessInt64x2 - OpMaskedLessEqualInt64x2 - OpMaskedMaxInt64x2 - OpMaskedMinInt64x2 - OpMaskedMulEvenWidenInt64x2 - OpMaskedMulLowInt64x2 - OpMaskedNotEqualInt64x2 - OpMaskedOrInt64x2 - OpMaskedPopCountInt64x2 - OpMaskedRotateLeftInt64x2 - OpMaskedRotateRightInt64x2 - OpMaskedShiftAllLeftInt64x2 - OpMaskedShiftAllRightInt64x2 - OpMaskedShiftAllRightSignExtendedInt64x2 - OpMaskedShiftLeftInt64x2 - OpMaskedShiftLeftAndFillUpperFromInt64x2 - OpMaskedShiftRightInt64x2 - OpMaskedShiftRightAndFillUpperFromInt64x2 - OpMaskedShiftRightSignExtendedInt64x2 - OpMaskedSubInt64x2 - OpMaskedXorInt64x2 + OpLessEqualMaskedInt64x2 + OpLessMaskedInt64x2 OpMaxInt64x2 + OpMaxMaskedInt64x2 OpMinInt64x2 + OpMinMaskedInt64x2 OpMulEvenWidenInt64x2 + OpMulEvenWidenMaskedInt64x2 OpMulLowInt64x2 + OpMulLowMaskedInt64x2 OpNotEqualInt64x2 + OpNotEqualMaskedInt64x2 OpOrInt64x2 + OpOrMaskedInt64x2 OpPopCountInt64x2 + OpPopCountMaskedInt64x2 OpRotateLeftInt64x2 + OpRotateLeftMaskedInt64x2 OpRotateRightInt64x2 + OpRotateRightMaskedInt64x2 OpShiftAllLeftInt64x2 + OpShiftAllLeftMaskedInt64x2 OpShiftAllRightInt64x2 + OpShiftAllRightMaskedInt64x2 OpShiftAllRightSignExtendedInt64x2 + OpShiftAllRightSignExtendedMaskedInt64x2 OpShiftLeftInt64x2 OpShiftLeftAndFillUpperFromInt64x2 + OpShiftLeftAndFillUpperFromMaskedInt64x2 + OpShiftLeftMaskedInt64x2 OpShiftRightInt64x2 OpShiftRightAndFillUpperFromInt64x2 + OpShiftRightAndFillUpperFromMaskedInt64x2 + OpShiftRightMaskedInt64x2 OpShiftRightSignExtendedInt64x2 + OpShiftRightSignExtendedMaskedInt64x2 OpSubInt64x2 + OpSubMaskedInt64x2 OpXorInt64x2 + OpXorMaskedInt64x2 OpAbsoluteInt64x4 + OpAbsoluteMaskedInt64x4 OpAddInt64x4 + OpAddMaskedInt64x4 OpAndInt64x4 + OpAndMaskedInt64x4 OpAndNotInt64x4 + OpAndNotMaskedInt64x4 OpEqualInt64x4 + OpEqualMaskedInt64x4 OpGreaterInt64x4 OpGreaterEqualInt64x4 + OpGreaterEqualMaskedInt64x4 + OpGreaterMaskedInt64x4 OpLessInt64x4 OpLessEqualInt64x4 - OpMaskedAbsoluteInt64x4 - OpMaskedAddInt64x4 - OpMaskedAndInt64x4 - OpMaskedAndNotInt64x4 - OpMaskedEqualInt64x4 - OpMaskedGreaterInt64x4 - OpMaskedGreaterEqualInt64x4 - OpMaskedLessInt64x4 - OpMaskedLessEqualInt64x4 - OpMaskedMaxInt64x4 - OpMaskedMinInt64x4 - OpMaskedMulEvenWidenInt64x4 - OpMaskedMulLowInt64x4 - OpMaskedNotEqualInt64x4 - OpMaskedOrInt64x4 - OpMaskedPopCountInt64x4 - OpMaskedRotateLeftInt64x4 - OpMaskedRotateRightInt64x4 - OpMaskedShiftAllLeftInt64x4 - OpMaskedShiftAllRightInt64x4 - OpMaskedShiftAllRightSignExtendedInt64x4 - OpMaskedShiftLeftInt64x4 - OpMaskedShiftLeftAndFillUpperFromInt64x4 - OpMaskedShiftRightInt64x4 - OpMaskedShiftRightAndFillUpperFromInt64x4 - OpMaskedShiftRightSignExtendedInt64x4 - OpMaskedSubInt64x4 - OpMaskedXorInt64x4 + OpLessEqualMaskedInt64x4 + OpLessMaskedInt64x4 OpMaxInt64x4 + OpMaxMaskedInt64x4 OpMinInt64x4 + OpMinMaskedInt64x4 OpMulEvenWidenInt64x4 + OpMulEvenWidenMaskedInt64x4 OpMulLowInt64x4 + OpMulLowMaskedInt64x4 OpNotEqualInt64x4 + OpNotEqualMaskedInt64x4 OpOrInt64x4 + OpOrMaskedInt64x4 OpPopCountInt64x4 + OpPopCountMaskedInt64x4 OpRotateLeftInt64x4 + OpRotateLeftMaskedInt64x4 OpRotateRightInt64x4 + OpRotateRightMaskedInt64x4 OpShiftAllLeftInt64x4 + OpShiftAllLeftMaskedInt64x4 OpShiftAllRightInt64x4 + OpShiftAllRightMaskedInt64x4 OpShiftAllRightSignExtendedInt64x4 + OpShiftAllRightSignExtendedMaskedInt64x4 OpShiftLeftInt64x4 OpShiftLeftAndFillUpperFromInt64x4 + OpShiftLeftAndFillUpperFromMaskedInt64x4 + OpShiftLeftMaskedInt64x4 OpShiftRightInt64x4 OpShiftRightAndFillUpperFromInt64x4 + OpShiftRightAndFillUpperFromMaskedInt64x4 + OpShiftRightMaskedInt64x4 OpShiftRightSignExtendedInt64x4 + OpShiftRightSignExtendedMaskedInt64x4 OpSubInt64x4 + OpSubMaskedInt64x4 OpXorInt64x4 + OpXorMaskedInt64x4 OpAbsoluteInt64x8 + OpAbsoluteMaskedInt64x8 OpAddInt64x8 + OpAddMaskedInt64x8 OpAndInt64x8 + OpAndMaskedInt64x8 OpAndNotInt64x8 + OpAndNotMaskedInt64x8 OpEqualInt64x8 + OpEqualMaskedInt64x8 OpGreaterInt64x8 OpGreaterEqualInt64x8 + OpGreaterEqualMaskedInt64x8 + OpGreaterMaskedInt64x8 OpLessInt64x8 OpLessEqualInt64x8 - OpMaskedAbsoluteInt64x8 - OpMaskedAddInt64x8 - OpMaskedAndInt64x8 - OpMaskedAndNotInt64x8 - OpMaskedEqualInt64x8 - OpMaskedGreaterInt64x8 - OpMaskedGreaterEqualInt64x8 - OpMaskedLessInt64x8 - OpMaskedLessEqualInt64x8 - OpMaskedMaxInt64x8 - OpMaskedMinInt64x8 - OpMaskedMulEvenWidenInt64x8 - OpMaskedMulLowInt64x8 - OpMaskedNotEqualInt64x8 - OpMaskedOrInt64x8 - OpMaskedPopCountInt64x8 - OpMaskedRotateLeftInt64x8 - OpMaskedRotateRightInt64x8 - OpMaskedShiftAllLeftInt64x8 - OpMaskedShiftAllRightInt64x8 - OpMaskedShiftAllRightSignExtendedInt64x8 - OpMaskedShiftLeftInt64x8 - OpMaskedShiftLeftAndFillUpperFromInt64x8 - OpMaskedShiftRightInt64x8 - OpMaskedShiftRightAndFillUpperFromInt64x8 - OpMaskedShiftRightSignExtendedInt64x8 - OpMaskedSubInt64x8 - OpMaskedXorInt64x8 + OpLessEqualMaskedInt64x8 + OpLessMaskedInt64x8 OpMaxInt64x8 + OpMaxMaskedInt64x8 OpMinInt64x8 + OpMinMaskedInt64x8 OpMulEvenWidenInt64x8 + OpMulEvenWidenMaskedInt64x8 OpMulLowInt64x8 + OpMulLowMaskedInt64x8 OpNotEqualInt64x8 + OpNotEqualMaskedInt64x8 OpOrInt64x8 + OpOrMaskedInt64x8 OpPopCountInt64x8 + OpPopCountMaskedInt64x8 OpRotateLeftInt64x8 + OpRotateLeftMaskedInt64x8 OpRotateRightInt64x8 + OpRotateRightMaskedInt64x8 OpShiftAllLeftInt64x8 + OpShiftAllLeftMaskedInt64x8 OpShiftAllRightInt64x8 + OpShiftAllRightMaskedInt64x8 OpShiftAllRightSignExtendedInt64x8 + OpShiftAllRightSignExtendedMaskedInt64x8 OpShiftLeftInt64x8 OpShiftLeftAndFillUpperFromInt64x8 + OpShiftLeftAndFillUpperFromMaskedInt64x8 + OpShiftLeftMaskedInt64x8 OpShiftRightInt64x8 OpShiftRightAndFillUpperFromInt64x8 + OpShiftRightAndFillUpperFromMaskedInt64x8 + OpShiftRightMaskedInt64x8 OpShiftRightSignExtendedInt64x8 + OpShiftRightSignExtendedMaskedInt64x8 OpSubInt64x8 + OpSubMaskedInt64x8 OpXorInt64x8 + OpXorMaskedInt64x8 OpAbsoluteInt8x16 + OpAbsoluteMaskedInt8x16 OpAddInt8x16 + OpAddMaskedInt8x16 OpAndInt8x16 OpAndNotInt8x16 OpEqualInt8x16 + OpEqualMaskedInt8x16 OpGreaterInt8x16 OpGreaterEqualInt8x16 + OpGreaterEqualMaskedInt8x16 + OpGreaterMaskedInt8x16 OpLessInt8x16 OpLessEqualInt8x16 - OpMaskedAbsoluteInt8x16 - OpMaskedAddInt8x16 - OpMaskedEqualInt8x16 - OpMaskedGreaterInt8x16 - OpMaskedGreaterEqualInt8x16 - OpMaskedLessInt8x16 - OpMaskedLessEqualInt8x16 - OpMaskedMaxInt8x16 - OpMaskedMinInt8x16 - OpMaskedNotEqualInt8x16 - OpMaskedPopCountInt8x16 - OpMaskedSaturatedAddInt8x16 - OpMaskedSaturatedSubInt8x16 - OpMaskedSubInt8x16 + OpLessEqualMaskedInt8x16 + OpLessMaskedInt8x16 OpMaxInt8x16 + OpMaxMaskedInt8x16 OpMinInt8x16 + OpMinMaskedInt8x16 OpNotEqualInt8x16 + OpNotEqualMaskedInt8x16 OpOrInt8x16 OpPopCountInt8x16 + OpPopCountMaskedInt8x16 OpSaturatedAddInt8x16 + OpSaturatedAddMaskedInt8x16 OpSaturatedSubInt8x16 + OpSaturatedSubMaskedInt8x16 OpSignInt8x16 OpSubInt8x16 + OpSubMaskedInt8x16 OpXorInt8x16 OpAbsoluteInt8x32 + OpAbsoluteMaskedInt8x32 OpAddInt8x32 + OpAddMaskedInt8x32 OpAndInt8x32 OpAndNotInt8x32 OpEqualInt8x32 + OpEqualMaskedInt8x32 OpGreaterInt8x32 OpGreaterEqualInt8x32 + OpGreaterEqualMaskedInt8x32 + OpGreaterMaskedInt8x32 OpLessInt8x32 OpLessEqualInt8x32 - OpMaskedAbsoluteInt8x32 - OpMaskedAddInt8x32 - OpMaskedEqualInt8x32 - OpMaskedGreaterInt8x32 - OpMaskedGreaterEqualInt8x32 - OpMaskedLessInt8x32 - OpMaskedLessEqualInt8x32 - OpMaskedMaxInt8x32 - OpMaskedMinInt8x32 - OpMaskedNotEqualInt8x32 - OpMaskedPopCountInt8x32 - OpMaskedSaturatedAddInt8x32 - OpMaskedSaturatedSubInt8x32 - OpMaskedSubInt8x32 + OpLessEqualMaskedInt8x32 + OpLessMaskedInt8x32 OpMaxInt8x32 + OpMaxMaskedInt8x32 OpMinInt8x32 + OpMinMaskedInt8x32 OpNotEqualInt8x32 + OpNotEqualMaskedInt8x32 OpOrInt8x32 OpPopCountInt8x32 + OpPopCountMaskedInt8x32 OpSaturatedAddInt8x32 + OpSaturatedAddMaskedInt8x32 OpSaturatedSubInt8x32 + OpSaturatedSubMaskedInt8x32 OpSignInt8x32 OpSubInt8x32 + OpSubMaskedInt8x32 OpXorInt8x32 OpAbsoluteInt8x64 + OpAbsoluteMaskedInt8x64 OpAddInt8x64 + OpAddMaskedInt8x64 OpEqualInt8x64 + OpEqualMaskedInt8x64 OpGreaterInt8x64 OpGreaterEqualInt8x64 + OpGreaterEqualMaskedInt8x64 + OpGreaterMaskedInt8x64 OpLessInt8x64 OpLessEqualInt8x64 - OpMaskedAbsoluteInt8x64 - OpMaskedAddInt8x64 - OpMaskedEqualInt8x64 - OpMaskedGreaterInt8x64 - OpMaskedGreaterEqualInt8x64 - OpMaskedLessInt8x64 - OpMaskedLessEqualInt8x64 - OpMaskedMaxInt8x64 - OpMaskedMinInt8x64 - OpMaskedNotEqualInt8x64 - OpMaskedPopCountInt8x64 - OpMaskedSaturatedAddInt8x64 - OpMaskedSaturatedSubInt8x64 - OpMaskedSubInt8x64 + OpLessEqualMaskedInt8x64 + OpLessMaskedInt8x64 OpMaxInt8x64 + OpMaxMaskedInt8x64 OpMinInt8x64 + OpMinMaskedInt8x64 OpNotEqualInt8x64 + OpNotEqualMaskedInt8x64 OpPopCountInt8x64 + OpPopCountMaskedInt8x64 OpSaturatedAddInt8x64 + OpSaturatedAddMaskedInt8x64 OpSaturatedSubInt8x64 + OpSaturatedSubMaskedInt8x64 OpSubInt8x64 + OpSubMaskedInt8x64 OpAddUint16x16 + OpAddMaskedUint16x16 OpAndUint16x16 OpAndNotUint16x16 OpAverageUint16x16 + OpAverageMaskedUint16x16 OpEqualUint16x16 + OpEqualMaskedUint16x16 OpGreaterUint16x16 OpGreaterEqualUint16x16 + OpGreaterEqualMaskedUint16x16 + OpGreaterMaskedUint16x16 OpLessUint16x16 OpLessEqualUint16x16 - OpMaskedAddUint16x16 - OpMaskedAverageUint16x16 - OpMaskedEqualUint16x16 - OpMaskedGreaterUint16x16 - OpMaskedGreaterEqualUint16x16 - OpMaskedLessUint16x16 - OpMaskedLessEqualUint16x16 - OpMaskedMaxUint16x16 - OpMaskedMinUint16x16 - OpMaskedMulHighUint16x16 - OpMaskedNotEqualUint16x16 - OpMaskedPopCountUint16x16 - OpMaskedSaturatedAddUint16x16 - OpMaskedSaturatedSubUint16x16 - OpMaskedShiftLeftUint16x16 - OpMaskedShiftLeftAndFillUpperFromUint16x16 - OpMaskedShiftRightUint16x16 - OpMaskedShiftRightAndFillUpperFromUint16x16 - OpMaskedShiftRightSignExtendedUint16x16 - OpMaskedSubUint16x16 + OpLessEqualMaskedUint16x16 + OpLessMaskedUint16x16 OpMaxUint16x16 + OpMaxMaskedUint16x16 OpMinUint16x16 + OpMinMaskedUint16x16 OpMulHighUint16x16 + OpMulHighMaskedUint16x16 OpNotEqualUint16x16 + OpNotEqualMaskedUint16x16 OpOrUint16x16 OpPairwiseAddUint16x16 OpPairwiseSubUint16x16 OpPopCountUint16x16 + OpPopCountMaskedUint16x16 OpSaturatedAddUint16x16 + OpSaturatedAddMaskedUint16x16 OpSaturatedSubUint16x16 + OpSaturatedSubMaskedUint16x16 OpShiftAllLeftUint16x16 OpShiftAllRightUint16x16 OpShiftLeftUint16x16 OpShiftLeftAndFillUpperFromUint16x16 + OpShiftLeftAndFillUpperFromMaskedUint16x16 + OpShiftLeftMaskedUint16x16 OpShiftRightUint16x16 OpShiftRightAndFillUpperFromUint16x16 + OpShiftRightAndFillUpperFromMaskedUint16x16 + OpShiftRightMaskedUint16x16 OpShiftRightSignExtendedUint16x16 + OpShiftRightSignExtendedMaskedUint16x16 OpSubUint16x16 + OpSubMaskedUint16x16 OpXorUint16x16 OpAddUint16x32 + OpAddMaskedUint16x32 OpAverageUint16x32 + OpAverageMaskedUint16x32 OpEqualUint16x32 + OpEqualMaskedUint16x32 OpGreaterUint16x32 OpGreaterEqualUint16x32 + OpGreaterEqualMaskedUint16x32 + OpGreaterMaskedUint16x32 OpLessUint16x32 OpLessEqualUint16x32 - OpMaskedAddUint16x32 - OpMaskedAverageUint16x32 - OpMaskedEqualUint16x32 - OpMaskedGreaterUint16x32 - OpMaskedGreaterEqualUint16x32 - OpMaskedLessUint16x32 - OpMaskedLessEqualUint16x32 - OpMaskedMaxUint16x32 - OpMaskedMinUint16x32 - OpMaskedMulHighUint16x32 - OpMaskedNotEqualUint16x32 - OpMaskedPopCountUint16x32 - OpMaskedSaturatedAddUint16x32 - OpMaskedSaturatedSubUint16x32 - OpMaskedShiftLeftUint16x32 - OpMaskedShiftLeftAndFillUpperFromUint16x32 - OpMaskedShiftRightUint16x32 - OpMaskedShiftRightAndFillUpperFromUint16x32 - OpMaskedShiftRightSignExtendedUint16x32 - OpMaskedSubUint16x32 + OpLessEqualMaskedUint16x32 + OpLessMaskedUint16x32 OpMaxUint16x32 + OpMaxMaskedUint16x32 OpMinUint16x32 + OpMinMaskedUint16x32 OpMulHighUint16x32 + OpMulHighMaskedUint16x32 OpNotEqualUint16x32 + OpNotEqualMaskedUint16x32 OpPopCountUint16x32 + OpPopCountMaskedUint16x32 OpSaturatedAddUint16x32 + OpSaturatedAddMaskedUint16x32 OpSaturatedSubUint16x32 + OpSaturatedSubMaskedUint16x32 OpShiftLeftUint16x32 OpShiftLeftAndFillUpperFromUint16x32 + OpShiftLeftAndFillUpperFromMaskedUint16x32 + OpShiftLeftMaskedUint16x32 OpShiftRightUint16x32 OpShiftRightAndFillUpperFromUint16x32 + OpShiftRightAndFillUpperFromMaskedUint16x32 + OpShiftRightMaskedUint16x32 OpShiftRightSignExtendedUint16x32 + OpShiftRightSignExtendedMaskedUint16x32 OpSubUint16x32 + OpSubMaskedUint16x32 OpAddUint16x8 + OpAddMaskedUint16x8 OpAndUint16x8 OpAndNotUint16x8 OpAverageUint16x8 + OpAverageMaskedUint16x8 OpEqualUint16x8 + OpEqualMaskedUint16x8 OpGreaterUint16x8 OpGreaterEqualUint16x8 + OpGreaterEqualMaskedUint16x8 + OpGreaterMaskedUint16x8 OpLessUint16x8 OpLessEqualUint16x8 - OpMaskedAddUint16x8 - OpMaskedAverageUint16x8 - OpMaskedEqualUint16x8 - OpMaskedGreaterUint16x8 - OpMaskedGreaterEqualUint16x8 - OpMaskedLessUint16x8 - OpMaskedLessEqualUint16x8 - OpMaskedMaxUint16x8 - OpMaskedMinUint16x8 - OpMaskedMulHighUint16x8 - OpMaskedNotEqualUint16x8 - OpMaskedPopCountUint16x8 - OpMaskedSaturatedAddUint16x8 - OpMaskedSaturatedSubUint16x8 - OpMaskedShiftLeftUint16x8 - OpMaskedShiftLeftAndFillUpperFromUint16x8 - OpMaskedShiftRightUint16x8 - OpMaskedShiftRightAndFillUpperFromUint16x8 - OpMaskedShiftRightSignExtendedUint16x8 - OpMaskedSubUint16x8 + OpLessEqualMaskedUint16x8 + OpLessMaskedUint16x8 OpMaxUint16x8 + OpMaxMaskedUint16x8 OpMinUint16x8 + OpMinMaskedUint16x8 OpMulHighUint16x8 + OpMulHighMaskedUint16x8 OpNotEqualUint16x8 + OpNotEqualMaskedUint16x8 OpOrUint16x8 OpPairwiseAddUint16x8 OpPairwiseSubUint16x8 OpPopCountUint16x8 + OpPopCountMaskedUint16x8 OpSaturatedAddUint16x8 + OpSaturatedAddMaskedUint16x8 OpSaturatedSubUint16x8 + OpSaturatedSubMaskedUint16x8 OpShiftAllLeftUint16x8 OpShiftAllRightUint16x8 OpShiftLeftUint16x8 OpShiftLeftAndFillUpperFromUint16x8 + OpShiftLeftAndFillUpperFromMaskedUint16x8 + OpShiftLeftMaskedUint16x8 OpShiftRightUint16x8 OpShiftRightAndFillUpperFromUint16x8 + OpShiftRightAndFillUpperFromMaskedUint16x8 + OpShiftRightMaskedUint16x8 OpShiftRightSignExtendedUint16x8 + OpShiftRightSignExtendedMaskedUint16x8 OpSubUint16x8 + OpSubMaskedUint16x8 OpXorUint16x8 OpAddUint32x16 + OpAddMaskedUint32x16 OpAndUint32x16 + OpAndMaskedUint32x16 OpAndNotUint32x16 + OpAndNotMaskedUint32x16 OpEqualUint32x16 + OpEqualMaskedUint32x16 OpGreaterUint32x16 OpGreaterEqualUint32x16 + OpGreaterEqualMaskedUint32x16 + OpGreaterMaskedUint32x16 OpLessUint32x16 OpLessEqualUint32x16 - OpMaskedAddUint32x16 - OpMaskedAndUint32x16 - OpMaskedAndNotUint32x16 - OpMaskedEqualUint32x16 - OpMaskedGreaterUint32x16 - OpMaskedGreaterEqualUint32x16 - OpMaskedLessUint32x16 - OpMaskedLessEqualUint32x16 - OpMaskedMaxUint32x16 - OpMaskedMinUint32x16 - OpMaskedNotEqualUint32x16 - OpMaskedOrUint32x16 - OpMaskedPopCountUint32x16 - OpMaskedRotateLeftUint32x16 - OpMaskedRotateRightUint32x16 - OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16 - OpMaskedShiftLeftUint32x16 - OpMaskedShiftLeftAndFillUpperFromUint32x16 - OpMaskedShiftRightUint32x16 - OpMaskedShiftRightAndFillUpperFromUint32x16 - OpMaskedShiftRightSignExtendedUint32x16 - OpMaskedSubUint32x16 - OpMaskedUnsignedSignedQuadDotProdAccumulateUint32x16 - OpMaskedXorUint32x16 + OpLessEqualMaskedUint32x16 + OpLessMaskedUint32x16 OpMaxUint32x16 + OpMaxMaskedUint32x16 OpMinUint32x16 + OpMinMaskedUint32x16 OpNotEqualUint32x16 + OpNotEqualMaskedUint32x16 OpOrUint32x16 + OpOrMaskedUint32x16 OpPopCountUint32x16 + OpPopCountMaskedUint32x16 OpRotateLeftUint32x16 + OpRotateLeftMaskedUint32x16 OpRotateRightUint32x16 + OpRotateRightMaskedUint32x16 OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16 + OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x16 OpShiftLeftUint32x16 OpShiftLeftAndFillUpperFromUint32x16 + OpShiftLeftAndFillUpperFromMaskedUint32x16 + OpShiftLeftMaskedUint32x16 OpShiftRightUint32x16 OpShiftRightAndFillUpperFromUint32x16 + OpShiftRightAndFillUpperFromMaskedUint32x16 + OpShiftRightMaskedUint32x16 OpShiftRightSignExtendedUint32x16 + OpShiftRightSignExtendedMaskedUint32x16 OpSubUint32x16 + OpSubMaskedUint32x16 OpUnsignedSignedQuadDotProdAccumulateUint32x16 + OpUnsignedSignedQuadDotProdAccumulateMaskedUint32x16 OpXorUint32x16 + OpXorMaskedUint32x16 OpAddUint32x4 + OpAddMaskedUint32x4 OpAndUint32x4 + OpAndMaskedUint32x4 OpAndNotUint32x4 + OpAndNotMaskedUint32x4 OpEqualUint32x4 + OpEqualMaskedUint32x4 OpGreaterUint32x4 OpGreaterEqualUint32x4 + OpGreaterEqualMaskedUint32x4 + OpGreaterMaskedUint32x4 OpLessUint32x4 OpLessEqualUint32x4 - OpMaskedAddUint32x4 - OpMaskedAndUint32x4 - OpMaskedAndNotUint32x4 - OpMaskedEqualUint32x4 - OpMaskedGreaterUint32x4 - OpMaskedGreaterEqualUint32x4 - OpMaskedLessUint32x4 - OpMaskedLessEqualUint32x4 - OpMaskedMaxUint32x4 - OpMaskedMinUint32x4 - OpMaskedNotEqualUint32x4 - OpMaskedOrUint32x4 - OpMaskedPopCountUint32x4 - OpMaskedRotateLeftUint32x4 - OpMaskedRotateRightUint32x4 - OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4 - OpMaskedShiftLeftUint32x4 - OpMaskedShiftLeftAndFillUpperFromUint32x4 - OpMaskedShiftRightUint32x4 - OpMaskedShiftRightAndFillUpperFromUint32x4 - OpMaskedShiftRightSignExtendedUint32x4 - OpMaskedSubUint32x4 - OpMaskedUnsignedSignedQuadDotProdAccumulateUint32x4 - OpMaskedXorUint32x4 + OpLessEqualMaskedUint32x4 + OpLessMaskedUint32x4 OpMaxUint32x4 + OpMaxMaskedUint32x4 OpMinUint32x4 + OpMinMaskedUint32x4 OpMulEvenWidenUint32x4 OpNotEqualUint32x4 + OpNotEqualMaskedUint32x4 OpOrUint32x4 + OpOrMaskedUint32x4 OpPairwiseAddUint32x4 OpPairwiseSubUint32x4 OpPopCountUint32x4 + OpPopCountMaskedUint32x4 OpRotateLeftUint32x4 + OpRotateLeftMaskedUint32x4 OpRotateRightUint32x4 + OpRotateRightMaskedUint32x4 OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4 + OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x4 OpShiftAllLeftUint32x4 OpShiftAllRightUint32x4 OpShiftLeftUint32x4 OpShiftLeftAndFillUpperFromUint32x4 + OpShiftLeftAndFillUpperFromMaskedUint32x4 + OpShiftLeftMaskedUint32x4 OpShiftRightUint32x4 OpShiftRightAndFillUpperFromUint32x4 + OpShiftRightAndFillUpperFromMaskedUint32x4 + OpShiftRightMaskedUint32x4 OpShiftRightSignExtendedUint32x4 + OpShiftRightSignExtendedMaskedUint32x4 OpSubUint32x4 + OpSubMaskedUint32x4 OpUnsignedSignedQuadDotProdAccumulateUint32x4 + OpUnsignedSignedQuadDotProdAccumulateMaskedUint32x4 OpXorUint32x4 + OpXorMaskedUint32x4 OpAddUint32x8 + OpAddMaskedUint32x8 OpAndUint32x8 + OpAndMaskedUint32x8 OpAndNotUint32x8 + OpAndNotMaskedUint32x8 OpEqualUint32x8 + OpEqualMaskedUint32x8 OpGreaterUint32x8 OpGreaterEqualUint32x8 + OpGreaterEqualMaskedUint32x8 + OpGreaterMaskedUint32x8 OpLessUint32x8 OpLessEqualUint32x8 - OpMaskedAddUint32x8 - OpMaskedAndUint32x8 - OpMaskedAndNotUint32x8 - OpMaskedEqualUint32x8 - OpMaskedGreaterUint32x8 - OpMaskedGreaterEqualUint32x8 - OpMaskedLessUint32x8 - OpMaskedLessEqualUint32x8 - OpMaskedMaxUint32x8 - OpMaskedMinUint32x8 - OpMaskedNotEqualUint32x8 - OpMaskedOrUint32x8 - OpMaskedPopCountUint32x8 - OpMaskedRotateLeftUint32x8 - OpMaskedRotateRightUint32x8 - OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8 - OpMaskedShiftLeftUint32x8 - OpMaskedShiftLeftAndFillUpperFromUint32x8 - OpMaskedShiftRightUint32x8 - OpMaskedShiftRightAndFillUpperFromUint32x8 - OpMaskedShiftRightSignExtendedUint32x8 - OpMaskedSubUint32x8 - OpMaskedUnsignedSignedQuadDotProdAccumulateUint32x8 - OpMaskedXorUint32x8 + OpLessEqualMaskedUint32x8 + OpLessMaskedUint32x8 OpMaxUint32x8 + OpMaxMaskedUint32x8 OpMinUint32x8 + OpMinMaskedUint32x8 OpMulEvenWidenUint32x8 OpNotEqualUint32x8 + OpNotEqualMaskedUint32x8 OpOrUint32x8 + OpOrMaskedUint32x8 OpPairwiseAddUint32x8 OpPairwiseSubUint32x8 OpPopCountUint32x8 + OpPopCountMaskedUint32x8 OpRotateLeftUint32x8 + OpRotateLeftMaskedUint32x8 OpRotateRightUint32x8 + OpRotateRightMaskedUint32x8 OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8 + OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x8 OpShiftAllLeftUint32x8 OpShiftAllRightUint32x8 OpShiftLeftUint32x8 OpShiftLeftAndFillUpperFromUint32x8 + OpShiftLeftAndFillUpperFromMaskedUint32x8 + OpShiftLeftMaskedUint32x8 OpShiftRightUint32x8 OpShiftRightAndFillUpperFromUint32x8 + OpShiftRightAndFillUpperFromMaskedUint32x8 + OpShiftRightMaskedUint32x8 OpShiftRightSignExtendedUint32x8 + OpShiftRightSignExtendedMaskedUint32x8 OpSubUint32x8 + OpSubMaskedUint32x8 OpUnsignedSignedQuadDotProdAccumulateUint32x8 + OpUnsignedSignedQuadDotProdAccumulateMaskedUint32x8 OpXorUint32x8 + OpXorMaskedUint32x8 OpAddUint64x2 + OpAddMaskedUint64x2 OpAndUint64x2 + OpAndMaskedUint64x2 OpAndNotUint64x2 + OpAndNotMaskedUint64x2 OpEqualUint64x2 + OpEqualMaskedUint64x2 OpGreaterUint64x2 OpGreaterEqualUint64x2 + OpGreaterEqualMaskedUint64x2 + OpGreaterMaskedUint64x2 OpLessUint64x2 OpLessEqualUint64x2 - OpMaskedAddUint64x2 - OpMaskedAndUint64x2 - OpMaskedAndNotUint64x2 - OpMaskedEqualUint64x2 - OpMaskedGreaterUint64x2 - OpMaskedGreaterEqualUint64x2 - OpMaskedLessUint64x2 - OpMaskedLessEqualUint64x2 - OpMaskedMaxUint64x2 - OpMaskedMinUint64x2 - OpMaskedMulEvenWidenUint64x2 - OpMaskedNotEqualUint64x2 - OpMaskedOrUint64x2 - OpMaskedPopCountUint64x2 - OpMaskedRotateLeftUint64x2 - OpMaskedRotateRightUint64x2 - OpMaskedShiftAllLeftUint64x2 - OpMaskedShiftAllRightUint64x2 - OpMaskedShiftLeftUint64x2 - OpMaskedShiftLeftAndFillUpperFromUint64x2 - OpMaskedShiftRightUint64x2 - OpMaskedShiftRightAndFillUpperFromUint64x2 - OpMaskedShiftRightSignExtendedUint64x2 - OpMaskedSubUint64x2 - OpMaskedXorUint64x2 + OpLessEqualMaskedUint64x2 + OpLessMaskedUint64x2 OpMaxUint64x2 + OpMaxMaskedUint64x2 OpMinUint64x2 + OpMinMaskedUint64x2 OpMulEvenWidenUint64x2 + OpMulEvenWidenMaskedUint64x2 OpNotEqualUint64x2 + OpNotEqualMaskedUint64x2 OpOrUint64x2 + OpOrMaskedUint64x2 OpPopCountUint64x2 + OpPopCountMaskedUint64x2 OpRotateLeftUint64x2 + OpRotateLeftMaskedUint64x2 OpRotateRightUint64x2 + OpRotateRightMaskedUint64x2 OpShiftAllLeftUint64x2 + OpShiftAllLeftMaskedUint64x2 OpShiftAllRightUint64x2 + OpShiftAllRightMaskedUint64x2 OpShiftLeftUint64x2 OpShiftLeftAndFillUpperFromUint64x2 + OpShiftLeftAndFillUpperFromMaskedUint64x2 + OpShiftLeftMaskedUint64x2 OpShiftRightUint64x2 OpShiftRightAndFillUpperFromUint64x2 + OpShiftRightAndFillUpperFromMaskedUint64x2 + OpShiftRightMaskedUint64x2 OpShiftRightSignExtendedUint64x2 + OpShiftRightSignExtendedMaskedUint64x2 OpSubUint64x2 + OpSubMaskedUint64x2 OpXorUint64x2 + OpXorMaskedUint64x2 OpAddUint64x4 + OpAddMaskedUint64x4 OpAndUint64x4 + OpAndMaskedUint64x4 OpAndNotUint64x4 + OpAndNotMaskedUint64x4 OpEqualUint64x4 + OpEqualMaskedUint64x4 OpGreaterUint64x4 OpGreaterEqualUint64x4 + OpGreaterEqualMaskedUint64x4 + OpGreaterMaskedUint64x4 OpLessUint64x4 OpLessEqualUint64x4 - OpMaskedAddUint64x4 - OpMaskedAndUint64x4 - OpMaskedAndNotUint64x4 - OpMaskedEqualUint64x4 - OpMaskedGreaterUint64x4 - OpMaskedGreaterEqualUint64x4 - OpMaskedLessUint64x4 - OpMaskedLessEqualUint64x4 - OpMaskedMaxUint64x4 - OpMaskedMinUint64x4 - OpMaskedMulEvenWidenUint64x4 - OpMaskedNotEqualUint64x4 - OpMaskedOrUint64x4 - OpMaskedPopCountUint64x4 - OpMaskedRotateLeftUint64x4 - OpMaskedRotateRightUint64x4 - OpMaskedShiftAllLeftUint64x4 - OpMaskedShiftAllRightUint64x4 - OpMaskedShiftLeftUint64x4 - OpMaskedShiftLeftAndFillUpperFromUint64x4 - OpMaskedShiftRightUint64x4 - OpMaskedShiftRightAndFillUpperFromUint64x4 - OpMaskedShiftRightSignExtendedUint64x4 - OpMaskedSubUint64x4 - OpMaskedXorUint64x4 + OpLessEqualMaskedUint64x4 + OpLessMaskedUint64x4 OpMaxUint64x4 + OpMaxMaskedUint64x4 OpMinUint64x4 + OpMinMaskedUint64x4 OpMulEvenWidenUint64x4 + OpMulEvenWidenMaskedUint64x4 OpNotEqualUint64x4 + OpNotEqualMaskedUint64x4 OpOrUint64x4 + OpOrMaskedUint64x4 OpPopCountUint64x4 + OpPopCountMaskedUint64x4 OpRotateLeftUint64x4 + OpRotateLeftMaskedUint64x4 OpRotateRightUint64x4 + OpRotateRightMaskedUint64x4 OpShiftAllLeftUint64x4 + OpShiftAllLeftMaskedUint64x4 OpShiftAllRightUint64x4 + OpShiftAllRightMaskedUint64x4 OpShiftLeftUint64x4 OpShiftLeftAndFillUpperFromUint64x4 + OpShiftLeftAndFillUpperFromMaskedUint64x4 + OpShiftLeftMaskedUint64x4 OpShiftRightUint64x4 OpShiftRightAndFillUpperFromUint64x4 + OpShiftRightAndFillUpperFromMaskedUint64x4 + OpShiftRightMaskedUint64x4 OpShiftRightSignExtendedUint64x4 + OpShiftRightSignExtendedMaskedUint64x4 OpSubUint64x4 + OpSubMaskedUint64x4 OpXorUint64x4 + OpXorMaskedUint64x4 OpAddUint64x8 + OpAddMaskedUint64x8 OpAndUint64x8 + OpAndMaskedUint64x8 OpAndNotUint64x8 + OpAndNotMaskedUint64x8 OpEqualUint64x8 + OpEqualMaskedUint64x8 OpGreaterUint64x8 OpGreaterEqualUint64x8 + OpGreaterEqualMaskedUint64x8 + OpGreaterMaskedUint64x8 OpLessUint64x8 OpLessEqualUint64x8 - OpMaskedAddUint64x8 - OpMaskedAndUint64x8 - OpMaskedAndNotUint64x8 - OpMaskedEqualUint64x8 - OpMaskedGreaterUint64x8 - OpMaskedGreaterEqualUint64x8 - OpMaskedLessUint64x8 - OpMaskedLessEqualUint64x8 - OpMaskedMaxUint64x8 - OpMaskedMinUint64x8 - OpMaskedMulEvenWidenUint64x8 - OpMaskedNotEqualUint64x8 - OpMaskedOrUint64x8 - OpMaskedPopCountUint64x8 - OpMaskedRotateLeftUint64x8 - OpMaskedRotateRightUint64x8 - OpMaskedShiftAllLeftUint64x8 - OpMaskedShiftAllRightUint64x8 - OpMaskedShiftLeftUint64x8 - OpMaskedShiftLeftAndFillUpperFromUint64x8 - OpMaskedShiftRightUint64x8 - OpMaskedShiftRightAndFillUpperFromUint64x8 - OpMaskedShiftRightSignExtendedUint64x8 - OpMaskedSubUint64x8 - OpMaskedXorUint64x8 + OpLessEqualMaskedUint64x8 + OpLessMaskedUint64x8 OpMaxUint64x8 + OpMaxMaskedUint64x8 OpMinUint64x8 + OpMinMaskedUint64x8 OpMulEvenWidenUint64x8 + OpMulEvenWidenMaskedUint64x8 OpNotEqualUint64x8 + OpNotEqualMaskedUint64x8 OpOrUint64x8 + OpOrMaskedUint64x8 OpPopCountUint64x8 + OpPopCountMaskedUint64x8 OpRotateLeftUint64x8 + OpRotateLeftMaskedUint64x8 OpRotateRightUint64x8 + OpRotateRightMaskedUint64x8 OpShiftAllLeftUint64x8 + OpShiftAllLeftMaskedUint64x8 OpShiftAllRightUint64x8 + OpShiftAllRightMaskedUint64x8 OpShiftLeftUint64x8 OpShiftLeftAndFillUpperFromUint64x8 + OpShiftLeftAndFillUpperFromMaskedUint64x8 + OpShiftLeftMaskedUint64x8 OpShiftRightUint64x8 OpShiftRightAndFillUpperFromUint64x8 + OpShiftRightAndFillUpperFromMaskedUint64x8 + OpShiftRightMaskedUint64x8 OpShiftRightSignExtendedUint64x8 + OpShiftRightSignExtendedMaskedUint64x8 OpSubUint64x8 + OpSubMaskedUint64x8 OpXorUint64x8 + OpXorMaskedUint64x8 OpAddUint8x16 + OpAddMaskedUint8x16 OpAndUint8x16 OpAndNotUint8x16 OpAverageUint8x16 + OpAverageMaskedUint8x16 OpEqualUint8x16 + OpEqualMaskedUint8x16 OpGaloisFieldMulUint8x16 + OpGaloisFieldMulMaskedUint8x16 OpGreaterUint8x16 OpGreaterEqualUint8x16 + OpGreaterEqualMaskedUint8x16 + OpGreaterMaskedUint8x16 OpLessUint8x16 OpLessEqualUint8x16 - OpMaskedAddUint8x16 - OpMaskedAverageUint8x16 - OpMaskedEqualUint8x16 - OpMaskedGaloisFieldMulUint8x16 - OpMaskedGreaterUint8x16 - OpMaskedGreaterEqualUint8x16 - OpMaskedLessUint8x16 - OpMaskedLessEqualUint8x16 - OpMaskedMaxUint8x16 - OpMaskedMinUint8x16 - OpMaskedNotEqualUint8x16 - OpMaskedPopCountUint8x16 - OpMaskedSaturatedAddUint8x16 - OpMaskedSaturatedSubUint8x16 - OpMaskedSaturatedUnsignedSignedPairDotProdUint8x16 - OpMaskedSubUint8x16 + OpLessEqualMaskedUint8x16 + OpLessMaskedUint8x16 OpMaxUint8x16 + OpMaxMaskedUint8x16 OpMinUint8x16 + OpMinMaskedUint8x16 OpNotEqualUint8x16 + OpNotEqualMaskedUint8x16 OpOrUint8x16 OpPopCountUint8x16 + OpPopCountMaskedUint8x16 OpSaturatedAddUint8x16 + OpSaturatedAddMaskedUint8x16 OpSaturatedSubUint8x16 + OpSaturatedSubMaskedUint8x16 OpSaturatedUnsignedSignedPairDotProdUint8x16 + OpSaturatedUnsignedSignedPairDotProdMaskedUint8x16 OpSubUint8x16 + OpSubMaskedUint8x16 OpXorUint8x16 OpAddUint8x32 + OpAddMaskedUint8x32 OpAndUint8x32 OpAndNotUint8x32 OpAverageUint8x32 + OpAverageMaskedUint8x32 OpEqualUint8x32 + OpEqualMaskedUint8x32 OpGaloisFieldMulUint8x32 + OpGaloisFieldMulMaskedUint8x32 OpGreaterUint8x32 OpGreaterEqualUint8x32 + OpGreaterEqualMaskedUint8x32 + OpGreaterMaskedUint8x32 OpLessUint8x32 OpLessEqualUint8x32 - OpMaskedAddUint8x32 - OpMaskedAverageUint8x32 - OpMaskedEqualUint8x32 - OpMaskedGaloisFieldMulUint8x32 - OpMaskedGreaterUint8x32 - OpMaskedGreaterEqualUint8x32 - OpMaskedLessUint8x32 - OpMaskedLessEqualUint8x32 - OpMaskedMaxUint8x32 - OpMaskedMinUint8x32 - OpMaskedNotEqualUint8x32 - OpMaskedPopCountUint8x32 - OpMaskedSaturatedAddUint8x32 - OpMaskedSaturatedSubUint8x32 - OpMaskedSaturatedUnsignedSignedPairDotProdUint8x32 - OpMaskedSubUint8x32 + OpLessEqualMaskedUint8x32 + OpLessMaskedUint8x32 OpMaxUint8x32 + OpMaxMaskedUint8x32 OpMinUint8x32 + OpMinMaskedUint8x32 OpNotEqualUint8x32 + OpNotEqualMaskedUint8x32 OpOrUint8x32 OpPopCountUint8x32 + OpPopCountMaskedUint8x32 OpSaturatedAddUint8x32 + OpSaturatedAddMaskedUint8x32 OpSaturatedSubUint8x32 + OpSaturatedSubMaskedUint8x32 OpSaturatedUnsignedSignedPairDotProdUint8x32 + OpSaturatedUnsignedSignedPairDotProdMaskedUint8x32 OpSubUint8x32 + OpSubMaskedUint8x32 OpXorUint8x32 OpAddUint8x64 + OpAddMaskedUint8x64 OpAverageUint8x64 + OpAverageMaskedUint8x64 OpEqualUint8x64 + OpEqualMaskedUint8x64 OpGaloisFieldMulUint8x64 + OpGaloisFieldMulMaskedUint8x64 OpGreaterUint8x64 OpGreaterEqualUint8x64 + OpGreaterEqualMaskedUint8x64 + OpGreaterMaskedUint8x64 OpLessUint8x64 OpLessEqualUint8x64 - OpMaskedAddUint8x64 - OpMaskedAverageUint8x64 - OpMaskedEqualUint8x64 - OpMaskedGaloisFieldMulUint8x64 - OpMaskedGreaterUint8x64 - OpMaskedGreaterEqualUint8x64 - OpMaskedLessUint8x64 - OpMaskedLessEqualUint8x64 - OpMaskedMaxUint8x64 - OpMaskedMinUint8x64 - OpMaskedNotEqualUint8x64 - OpMaskedPopCountUint8x64 - OpMaskedSaturatedAddUint8x64 - OpMaskedSaturatedSubUint8x64 - OpMaskedSaturatedUnsignedSignedPairDotProdUint8x64 - OpMaskedSubUint8x64 + OpLessEqualMaskedUint8x64 + OpLessMaskedUint8x64 OpMaxUint8x64 + OpMaxMaskedUint8x64 OpMinUint8x64 + OpMinMaskedUint8x64 OpNotEqualUint8x64 + OpNotEqualMaskedUint8x64 OpPopCountUint8x64 + OpPopCountMaskedUint8x64 OpSaturatedAddUint8x64 + OpSaturatedAddMaskedUint8x64 OpSaturatedSubUint8x64 + OpSaturatedSubMaskedUint8x64 OpSaturatedUnsignedSignedPairDotProdUint8x64 + OpSaturatedUnsignedSignedPairDotProdMaskedUint8x64 OpSubUint8x64 + OpSubMaskedUint8x64 OpCeilWithPrecisionFloat32x16 + OpCeilWithPrecisionMaskedFloat32x16 OpDiffWithCeilWithPrecisionFloat32x16 + OpDiffWithCeilWithPrecisionMaskedFloat32x16 OpDiffWithFloorWithPrecisionFloat32x16 + OpDiffWithFloorWithPrecisionMaskedFloat32x16 OpDiffWithRoundWithPrecisionFloat32x16 + OpDiffWithRoundWithPrecisionMaskedFloat32x16 OpDiffWithTruncWithPrecisionFloat32x16 + OpDiffWithTruncWithPrecisionMaskedFloat32x16 OpFloorWithPrecisionFloat32x16 - OpMaskedCeilWithPrecisionFloat32x16 - OpMaskedDiffWithCeilWithPrecisionFloat32x16 - OpMaskedDiffWithFloorWithPrecisionFloat32x16 - OpMaskedDiffWithRoundWithPrecisionFloat32x16 - OpMaskedDiffWithTruncWithPrecisionFloat32x16 - OpMaskedFloorWithPrecisionFloat32x16 - OpMaskedRoundWithPrecisionFloat32x16 - OpMaskedTruncWithPrecisionFloat32x16 + OpFloorWithPrecisionMaskedFloat32x16 OpRoundWithPrecisionFloat32x16 + OpRoundWithPrecisionMaskedFloat32x16 OpTruncWithPrecisionFloat32x16 + OpTruncWithPrecisionMaskedFloat32x16 OpCeilWithPrecisionFloat32x4 + OpCeilWithPrecisionMaskedFloat32x4 OpDiffWithCeilWithPrecisionFloat32x4 + OpDiffWithCeilWithPrecisionMaskedFloat32x4 OpDiffWithFloorWithPrecisionFloat32x4 + OpDiffWithFloorWithPrecisionMaskedFloat32x4 OpDiffWithRoundWithPrecisionFloat32x4 + OpDiffWithRoundWithPrecisionMaskedFloat32x4 OpDiffWithTruncWithPrecisionFloat32x4 + OpDiffWithTruncWithPrecisionMaskedFloat32x4 OpFloorWithPrecisionFloat32x4 - OpMaskedCeilWithPrecisionFloat32x4 - OpMaskedDiffWithCeilWithPrecisionFloat32x4 - OpMaskedDiffWithFloorWithPrecisionFloat32x4 - OpMaskedDiffWithRoundWithPrecisionFloat32x4 - OpMaskedDiffWithTruncWithPrecisionFloat32x4 - OpMaskedFloorWithPrecisionFloat32x4 - OpMaskedRoundWithPrecisionFloat32x4 - OpMaskedTruncWithPrecisionFloat32x4 + OpFloorWithPrecisionMaskedFloat32x4 OpRoundWithPrecisionFloat32x4 + OpRoundWithPrecisionMaskedFloat32x4 OpTruncWithPrecisionFloat32x4 + OpTruncWithPrecisionMaskedFloat32x4 OpCeilWithPrecisionFloat32x8 + OpCeilWithPrecisionMaskedFloat32x8 OpDiffWithCeilWithPrecisionFloat32x8 + OpDiffWithCeilWithPrecisionMaskedFloat32x8 OpDiffWithFloorWithPrecisionFloat32x8 + OpDiffWithFloorWithPrecisionMaskedFloat32x8 OpDiffWithRoundWithPrecisionFloat32x8 + OpDiffWithRoundWithPrecisionMaskedFloat32x8 OpDiffWithTruncWithPrecisionFloat32x8 + OpDiffWithTruncWithPrecisionMaskedFloat32x8 OpFloorWithPrecisionFloat32x8 + OpFloorWithPrecisionMaskedFloat32x8 OpGet128Float32x8 - OpMaskedCeilWithPrecisionFloat32x8 - OpMaskedDiffWithCeilWithPrecisionFloat32x8 - OpMaskedDiffWithFloorWithPrecisionFloat32x8 - OpMaskedDiffWithRoundWithPrecisionFloat32x8 - OpMaskedDiffWithTruncWithPrecisionFloat32x8 - OpMaskedFloorWithPrecisionFloat32x8 - OpMaskedRoundWithPrecisionFloat32x8 - OpMaskedTruncWithPrecisionFloat32x8 OpRoundWithPrecisionFloat32x8 + OpRoundWithPrecisionMaskedFloat32x8 OpSet128Float32x8 OpTruncWithPrecisionFloat32x8 + OpTruncWithPrecisionMaskedFloat32x8 OpCeilWithPrecisionFloat64x2 + OpCeilWithPrecisionMaskedFloat64x2 OpDiffWithCeilWithPrecisionFloat64x2 + OpDiffWithCeilWithPrecisionMaskedFloat64x2 OpDiffWithFloorWithPrecisionFloat64x2 + OpDiffWithFloorWithPrecisionMaskedFloat64x2 OpDiffWithRoundWithPrecisionFloat64x2 + OpDiffWithRoundWithPrecisionMaskedFloat64x2 OpDiffWithTruncWithPrecisionFloat64x2 + OpDiffWithTruncWithPrecisionMaskedFloat64x2 OpFloorWithPrecisionFloat64x2 - OpMaskedCeilWithPrecisionFloat64x2 - OpMaskedDiffWithCeilWithPrecisionFloat64x2 - OpMaskedDiffWithFloorWithPrecisionFloat64x2 - OpMaskedDiffWithRoundWithPrecisionFloat64x2 - OpMaskedDiffWithTruncWithPrecisionFloat64x2 - OpMaskedFloorWithPrecisionFloat64x2 - OpMaskedRoundWithPrecisionFloat64x2 - OpMaskedTruncWithPrecisionFloat64x2 + OpFloorWithPrecisionMaskedFloat64x2 OpRoundWithPrecisionFloat64x2 + OpRoundWithPrecisionMaskedFloat64x2 OpTruncWithPrecisionFloat64x2 + OpTruncWithPrecisionMaskedFloat64x2 OpCeilWithPrecisionFloat64x4 + OpCeilWithPrecisionMaskedFloat64x4 OpDiffWithCeilWithPrecisionFloat64x4 + OpDiffWithCeilWithPrecisionMaskedFloat64x4 OpDiffWithFloorWithPrecisionFloat64x4 + OpDiffWithFloorWithPrecisionMaskedFloat64x4 OpDiffWithRoundWithPrecisionFloat64x4 + OpDiffWithRoundWithPrecisionMaskedFloat64x4 OpDiffWithTruncWithPrecisionFloat64x4 + OpDiffWithTruncWithPrecisionMaskedFloat64x4 OpFloorWithPrecisionFloat64x4 + OpFloorWithPrecisionMaskedFloat64x4 OpGet128Float64x4 - OpMaskedCeilWithPrecisionFloat64x4 - OpMaskedDiffWithCeilWithPrecisionFloat64x4 - OpMaskedDiffWithFloorWithPrecisionFloat64x4 - OpMaskedDiffWithRoundWithPrecisionFloat64x4 - OpMaskedDiffWithTruncWithPrecisionFloat64x4 - OpMaskedFloorWithPrecisionFloat64x4 - OpMaskedRoundWithPrecisionFloat64x4 - OpMaskedTruncWithPrecisionFloat64x4 OpRoundWithPrecisionFloat64x4 + OpRoundWithPrecisionMaskedFloat64x4 OpSet128Float64x4 OpTruncWithPrecisionFloat64x4 + OpTruncWithPrecisionMaskedFloat64x4 OpCeilWithPrecisionFloat64x8 + OpCeilWithPrecisionMaskedFloat64x8 OpDiffWithCeilWithPrecisionFloat64x8 + OpDiffWithCeilWithPrecisionMaskedFloat64x8 OpDiffWithFloorWithPrecisionFloat64x8 + OpDiffWithFloorWithPrecisionMaskedFloat64x8 OpDiffWithRoundWithPrecisionFloat64x8 + OpDiffWithRoundWithPrecisionMaskedFloat64x8 OpDiffWithTruncWithPrecisionFloat64x8 + OpDiffWithTruncWithPrecisionMaskedFloat64x8 OpFloorWithPrecisionFloat64x8 - OpMaskedCeilWithPrecisionFloat64x8 - OpMaskedDiffWithCeilWithPrecisionFloat64x8 - OpMaskedDiffWithFloorWithPrecisionFloat64x8 - OpMaskedDiffWithRoundWithPrecisionFloat64x8 - OpMaskedDiffWithTruncWithPrecisionFloat64x8 - OpMaskedFloorWithPrecisionFloat64x8 - OpMaskedRoundWithPrecisionFloat64x8 - OpMaskedTruncWithPrecisionFloat64x8 + OpFloorWithPrecisionMaskedFloat64x8 OpRoundWithPrecisionFloat64x8 + OpRoundWithPrecisionMaskedFloat64x8 OpTruncWithPrecisionFloat64x8 + OpTruncWithPrecisionMaskedFloat64x8 OpGet128Int16x16 - OpMaskedShiftAllLeftAndFillUpperFromInt16x16 - OpMaskedShiftAllRightAndFillUpperFromInt16x16 OpSet128Int16x16 OpShiftAllLeftAndFillUpperFromInt16x16 + OpShiftAllLeftAndFillUpperFromMaskedInt16x16 OpShiftAllRightAndFillUpperFromInt16x16 - OpMaskedShiftAllLeftAndFillUpperFromInt16x32 - OpMaskedShiftAllRightAndFillUpperFromInt16x32 + OpShiftAllRightAndFillUpperFromMaskedInt16x16 OpShiftAllLeftAndFillUpperFromInt16x32 + OpShiftAllLeftAndFillUpperFromMaskedInt16x32 OpShiftAllRightAndFillUpperFromInt16x32 + OpShiftAllRightAndFillUpperFromMaskedInt16x32 OpGetElemInt16x8 - OpMaskedShiftAllLeftAndFillUpperFromInt16x8 - OpMaskedShiftAllRightAndFillUpperFromInt16x8 OpSetElemInt16x8 OpShiftAllLeftAndFillUpperFromInt16x8 + OpShiftAllLeftAndFillUpperFromMaskedInt16x8 OpShiftAllRightAndFillUpperFromInt16x8 - OpMaskedRotateAllLeftInt32x16 - OpMaskedRotateAllRightInt32x16 - OpMaskedShiftAllLeftAndFillUpperFromInt32x16 - OpMaskedShiftAllRightAndFillUpperFromInt32x16 + OpShiftAllRightAndFillUpperFromMaskedInt16x8 OpRotateAllLeftInt32x16 + OpRotateAllLeftMaskedInt32x16 OpRotateAllRightInt32x16 + OpRotateAllRightMaskedInt32x16 OpShiftAllLeftAndFillUpperFromInt32x16 + OpShiftAllLeftAndFillUpperFromMaskedInt32x16 OpShiftAllRightAndFillUpperFromInt32x16 + OpShiftAllRightAndFillUpperFromMaskedInt32x16 OpGetElemInt32x4 - OpMaskedRotateAllLeftInt32x4 - OpMaskedRotateAllRightInt32x4 - OpMaskedShiftAllLeftAndFillUpperFromInt32x4 - OpMaskedShiftAllRightAndFillUpperFromInt32x4 OpRotateAllLeftInt32x4 + OpRotateAllLeftMaskedInt32x4 OpRotateAllRightInt32x4 + OpRotateAllRightMaskedInt32x4 OpSetElemInt32x4 OpShiftAllLeftAndFillUpperFromInt32x4 + OpShiftAllLeftAndFillUpperFromMaskedInt32x4 OpShiftAllRightAndFillUpperFromInt32x4 + OpShiftAllRightAndFillUpperFromMaskedInt32x4 OpGet128Int32x8 - OpMaskedRotateAllLeftInt32x8 - OpMaskedRotateAllRightInt32x8 - OpMaskedShiftAllLeftAndFillUpperFromInt32x8 - OpMaskedShiftAllRightAndFillUpperFromInt32x8 OpRotateAllLeftInt32x8 + OpRotateAllLeftMaskedInt32x8 OpRotateAllRightInt32x8 + OpRotateAllRightMaskedInt32x8 OpSet128Int32x8 OpShiftAllLeftAndFillUpperFromInt32x8 + OpShiftAllLeftAndFillUpperFromMaskedInt32x8 OpShiftAllRightAndFillUpperFromInt32x8 + OpShiftAllRightAndFillUpperFromMaskedInt32x8 OpGetElemInt64x2 - OpMaskedRotateAllLeftInt64x2 - OpMaskedRotateAllRightInt64x2 - OpMaskedShiftAllLeftAndFillUpperFromInt64x2 - OpMaskedShiftAllRightAndFillUpperFromInt64x2 OpRotateAllLeftInt64x2 + OpRotateAllLeftMaskedInt64x2 OpRotateAllRightInt64x2 + OpRotateAllRightMaskedInt64x2 OpSetElemInt64x2 OpShiftAllLeftAndFillUpperFromInt64x2 + OpShiftAllLeftAndFillUpperFromMaskedInt64x2 OpShiftAllRightAndFillUpperFromInt64x2 + OpShiftAllRightAndFillUpperFromMaskedInt64x2 OpGet128Int64x4 - OpMaskedRotateAllLeftInt64x4 - OpMaskedRotateAllRightInt64x4 - OpMaskedShiftAllLeftAndFillUpperFromInt64x4 - OpMaskedShiftAllRightAndFillUpperFromInt64x4 OpRotateAllLeftInt64x4 + OpRotateAllLeftMaskedInt64x4 OpRotateAllRightInt64x4 + OpRotateAllRightMaskedInt64x4 OpSet128Int64x4 OpShiftAllLeftAndFillUpperFromInt64x4 + OpShiftAllLeftAndFillUpperFromMaskedInt64x4 OpShiftAllRightAndFillUpperFromInt64x4 - OpMaskedRotateAllLeftInt64x8 - OpMaskedRotateAllRightInt64x8 - OpMaskedShiftAllLeftAndFillUpperFromInt64x8 - OpMaskedShiftAllRightAndFillUpperFromInt64x8 + OpShiftAllRightAndFillUpperFromMaskedInt64x4 OpRotateAllLeftInt64x8 + OpRotateAllLeftMaskedInt64x8 OpRotateAllRightInt64x8 + OpRotateAllRightMaskedInt64x8 OpShiftAllLeftAndFillUpperFromInt64x8 + OpShiftAllLeftAndFillUpperFromMaskedInt64x8 OpShiftAllRightAndFillUpperFromInt64x8 + OpShiftAllRightAndFillUpperFromMaskedInt64x8 OpGetElemInt8x16 OpSetElemInt8x16 OpGet128Int8x32 OpSet128Int8x32 OpGet128Uint16x16 - OpMaskedShiftAllLeftAndFillUpperFromUint16x16 - OpMaskedShiftAllRightAndFillUpperFromUint16x16 OpSet128Uint16x16 OpShiftAllLeftAndFillUpperFromUint16x16 + OpShiftAllLeftAndFillUpperFromMaskedUint16x16 OpShiftAllRightAndFillUpperFromUint16x16 - OpMaskedShiftAllLeftAndFillUpperFromUint16x32 - OpMaskedShiftAllRightAndFillUpperFromUint16x32 + OpShiftAllRightAndFillUpperFromMaskedUint16x16 OpShiftAllLeftAndFillUpperFromUint16x32 + OpShiftAllLeftAndFillUpperFromMaskedUint16x32 OpShiftAllRightAndFillUpperFromUint16x32 + OpShiftAllRightAndFillUpperFromMaskedUint16x32 OpGetElemUint16x8 - OpMaskedShiftAllLeftAndFillUpperFromUint16x8 - OpMaskedShiftAllRightAndFillUpperFromUint16x8 OpSetElemUint16x8 OpShiftAllLeftAndFillUpperFromUint16x8 + OpShiftAllLeftAndFillUpperFromMaskedUint16x8 OpShiftAllRightAndFillUpperFromUint16x8 - OpMaskedRotateAllLeftUint32x16 - OpMaskedRotateAllRightUint32x16 - OpMaskedShiftAllLeftAndFillUpperFromUint32x16 - OpMaskedShiftAllRightAndFillUpperFromUint32x16 + OpShiftAllRightAndFillUpperFromMaskedUint16x8 OpRotateAllLeftUint32x16 + OpRotateAllLeftMaskedUint32x16 OpRotateAllRightUint32x16 + OpRotateAllRightMaskedUint32x16 OpShiftAllLeftAndFillUpperFromUint32x16 + OpShiftAllLeftAndFillUpperFromMaskedUint32x16 OpShiftAllRightAndFillUpperFromUint32x16 + OpShiftAllRightAndFillUpperFromMaskedUint32x16 OpGetElemUint32x4 - OpMaskedRotateAllLeftUint32x4 - OpMaskedRotateAllRightUint32x4 - OpMaskedShiftAllLeftAndFillUpperFromUint32x4 - OpMaskedShiftAllRightAndFillUpperFromUint32x4 OpRotateAllLeftUint32x4 + OpRotateAllLeftMaskedUint32x4 OpRotateAllRightUint32x4 + OpRotateAllRightMaskedUint32x4 OpSetElemUint32x4 OpShiftAllLeftAndFillUpperFromUint32x4 + OpShiftAllLeftAndFillUpperFromMaskedUint32x4 OpShiftAllRightAndFillUpperFromUint32x4 + OpShiftAllRightAndFillUpperFromMaskedUint32x4 OpGet128Uint32x8 - OpMaskedRotateAllLeftUint32x8 - OpMaskedRotateAllRightUint32x8 - OpMaskedShiftAllLeftAndFillUpperFromUint32x8 - OpMaskedShiftAllRightAndFillUpperFromUint32x8 OpRotateAllLeftUint32x8 + OpRotateAllLeftMaskedUint32x8 OpRotateAllRightUint32x8 + OpRotateAllRightMaskedUint32x8 OpSet128Uint32x8 OpShiftAllLeftAndFillUpperFromUint32x8 + OpShiftAllLeftAndFillUpperFromMaskedUint32x8 OpShiftAllRightAndFillUpperFromUint32x8 + OpShiftAllRightAndFillUpperFromMaskedUint32x8 OpGetElemUint64x2 - OpMaskedRotateAllLeftUint64x2 - OpMaskedRotateAllRightUint64x2 - OpMaskedShiftAllLeftAndFillUpperFromUint64x2 - OpMaskedShiftAllRightAndFillUpperFromUint64x2 OpRotateAllLeftUint64x2 + OpRotateAllLeftMaskedUint64x2 OpRotateAllRightUint64x2 + OpRotateAllRightMaskedUint64x2 OpSetElemUint64x2 OpShiftAllLeftAndFillUpperFromUint64x2 + OpShiftAllLeftAndFillUpperFromMaskedUint64x2 OpShiftAllRightAndFillUpperFromUint64x2 + OpShiftAllRightAndFillUpperFromMaskedUint64x2 OpGet128Uint64x4 - OpMaskedRotateAllLeftUint64x4 - OpMaskedRotateAllRightUint64x4 - OpMaskedShiftAllLeftAndFillUpperFromUint64x4 - OpMaskedShiftAllRightAndFillUpperFromUint64x4 OpRotateAllLeftUint64x4 + OpRotateAllLeftMaskedUint64x4 OpRotateAllRightUint64x4 + OpRotateAllRightMaskedUint64x4 OpSet128Uint64x4 OpShiftAllLeftAndFillUpperFromUint64x4 + OpShiftAllLeftAndFillUpperFromMaskedUint64x4 OpShiftAllRightAndFillUpperFromUint64x4 - OpMaskedRotateAllLeftUint64x8 - OpMaskedRotateAllRightUint64x8 - OpMaskedShiftAllLeftAndFillUpperFromUint64x8 - OpMaskedShiftAllRightAndFillUpperFromUint64x8 + OpShiftAllRightAndFillUpperFromMaskedUint64x4 OpRotateAllLeftUint64x8 + OpRotateAllLeftMaskedUint64x8 OpRotateAllRightUint64x8 + OpRotateAllRightMaskedUint64x8 OpShiftAllLeftAndFillUpperFromUint64x8 + OpShiftAllLeftAndFillUpperFromMaskedUint64x8 OpShiftAllRightAndFillUpperFromUint64x8 + OpShiftAllRightAndFillUpperFromMaskedUint64x8 OpGaloisFieldAffineTransformUint8x16 OpGaloisFieldAffineTransformInversedUint8x16 + OpGaloisFieldAffineTransformInversedMaskedUint8x16 + OpGaloisFieldAffineTransformMaskedUint8x16 OpGetElemUint8x16 - OpMaskedGaloisFieldAffineTransformUint8x16 - OpMaskedGaloisFieldAffineTransformInversedUint8x16 OpSetElemUint8x16 OpGaloisFieldAffineTransformUint8x32 OpGaloisFieldAffineTransformInversedUint8x32 + OpGaloisFieldAffineTransformInversedMaskedUint8x32 + OpGaloisFieldAffineTransformMaskedUint8x32 OpGet128Uint8x32 - OpMaskedGaloisFieldAffineTransformUint8x32 - OpMaskedGaloisFieldAffineTransformInversedUint8x32 OpSet128Uint8x32 OpGaloisFieldAffineTransformUint8x64 OpGaloisFieldAffineTransformInversedUint8x64 - OpMaskedGaloisFieldAffineTransformUint8x64 - OpMaskedGaloisFieldAffineTransformInversedUint8x64 + OpGaloisFieldAffineTransformInversedMaskedUint8x64 + OpGaloisFieldAffineTransformMaskedUint8x64 ) var opcodeTable = [...]opInfo{ @@ -18580,12 +18580,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VRCP14PS512", - argLen: 1, - asm: x86.AVRCP14PS, + name: "VADDPSMasked512", + argLen: 3, + commutative: true, + asm: x86.AVADDPS, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -18593,9 +18596,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VRSQRT14PS512", + name: "VRCP14PS512", argLen: 1, - asm: x86.AVRSQRT14PS, + asm: x86.AVRCP14PS, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -18606,13 +18609,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VDIVPS512", + name: "VRCP14PSMasked512", argLen: 2, - asm: x86.AVDIVPS, + asm: x86.AVRCP14PS, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -18620,15 +18623,12 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VFMADD213PS512", - argLen: 3, - resultInArg0: true, - asm: x86.AVFMADD213PS, + name: "VRSQRT14PS512", + argLen: 1, + asm: x86.AVRSQRT14PS, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -18636,15 +18636,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VFMADDSUB213PS512", - argLen: 3, - resultInArg0: true, - asm: x86.AVFMADDSUB213PS, + name: "VRSQRT14PSMasked512", + argLen: 2, + asm: x86.AVRSQRT14PS, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -18652,15 +18650,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VFMSUBADD213PS512", - argLen: 3, - resultInArg0: true, - asm: x86.AVFMSUBADD213PS, + name: "VDIVPS512", + argLen: 2, + asm: x86.AVDIVPS, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -18668,10 +18664,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VADDPSMasked512", - argLen: 3, - commutative: true, - asm: x86.AVADDPS, + name: "VDIVPSMasked512", + argLen: 3, + asm: x86.AVDIVPS, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -18684,13 +18679,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VRCP14PSMasked512", - argLen: 2, - asm: x86.AVRCP14PS, + name: "VFMADD213PS512", + argLen: 3, + resultInArg0: true, + asm: x86.AVFMADD213PS, reg: regInfo{ inputs: []inputInfo{ - {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -18698,13 +18695,16 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VRSQRT14PSMasked512", - argLen: 2, - asm: x86.AVRSQRT14PS, + name: "VFMADD213PSMasked512", + argLen: 4, + resultInArg0: true, + asm: x86.AVFMADD213PS, reg: regInfo{ inputs: []inputInfo{ - {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -18712,14 +18712,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VDIVPSMasked512", - argLen: 3, - asm: x86.AVDIVPS, + name: "VFMADDSUB213PS512", + argLen: 3, + resultInArg0: true, + asm: x86.AVFMADDSUB213PS, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -18727,10 +18728,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VFMADD213PSMasked512", + name: "VFMADDSUB213PSMasked512", argLen: 4, resultInArg0: true, - asm: x86.AVFMADD213PS, + asm: x86.AVFMADDSUB213PS, reg: regInfo{ inputs: []inputInfo{ {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -18744,16 +18745,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VFMADDSUB213PSMasked512", - argLen: 4, + name: "VFMSUBADD213PS512", + argLen: 3, resultInArg0: true, - asm: x86.AVFMADDSUB213PS, + asm: x86.AVFMSUBADD213PS, reg: regInfo{ inputs: []inputInfo{ - {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -18777,6 +18777,21 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VMAXPS512", + argLen: 2, + commutative: true, + asm: x86.AVMAXPS, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VMAXPSMasked512", argLen: 3, @@ -18794,15 +18809,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VMINPSMasked512", - argLen: 3, + name: "VMINPS512", + argLen: 2, commutative: true, asm: x86.AVMINPS, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -18810,10 +18824,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VMULPSMasked512", + name: "VMINPSMasked512", argLen: 3, commutative: true, - asm: x86.AVMULPS, + asm: x86.AVMINPS, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -18826,14 +18840,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VSCALEFPSMasked512", - argLen: 3, - asm: x86.AVSCALEFPS, + name: "VMULPS512", + argLen: 2, + commutative: true, + asm: x86.AVMULPS, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -18841,13 +18855,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VSQRTPSMasked512", + name: "VSCALEFPS512", argLen: 2, - asm: x86.AVSQRTPS, + asm: x86.AVSCALEFPS, reg: regInfo{ inputs: []inputInfo{ - {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -18855,9 +18869,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VSUBPSMasked512", + name: "VSCALEFPSMasked512", argLen: 3, - asm: x86.AVSUBPS, + asm: x86.AVSCALEFPS, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -18870,14 +18884,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VMAXPS512", - argLen: 2, + name: "VMULPSMasked512", + argLen: 3, commutative: true, - asm: x86.AVMAXPS, + asm: x86.AVMULPS, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -18885,14 +18900,12 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VMINPS512", - argLen: 2, - commutative: true, - asm: x86.AVMINPS, + name: "VSQRTPS512", + argLen: 1, + asm: x86.AVSQRTPS, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -18900,14 +18913,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VMULPS512", - argLen: 2, - commutative: true, - asm: x86.AVMULPS, + name: "VSQRTPSMasked512", + argLen: 2, + asm: x86.AVSQRTPS, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -18915,9 +18927,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VSCALEFPS512", + name: "VSUBPS512", argLen: 2, - asm: x86.AVSCALEFPS, + asm: x86.AVSUBPS, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -18929,12 +18941,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VSQRTPS512", - argLen: 1, - asm: x86.AVSQRTPS, + name: "VSUBPSMasked512", + argLen: 3, + asm: x86.AVSUBPS, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -18942,9 +18956,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VSUBPS512", - argLen: 2, - asm: x86.AVSUBPS, + name: "VADDPS128", + argLen: 2, + commutative: true, + asm: x86.AVADDPS, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -18956,14 +18971,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VADDPS128", - argLen: 2, + name: "VADDPSMasked128", + argLen: 3, commutative: true, asm: x86.AVADDPS, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -18998,26 +19014,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VRSQRTPS128", - argLen: 1, - asm: x86.AVRSQRTPS, - reg: regInfo{ - inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, - { - name: "VDIVPS128", + name: "VRCP14PSMasked128", argLen: 2, - asm: x86.AVDIVPS, + asm: x86.AVRCP14PS, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -19025,15 +19028,12 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VFMADD213PS128", - argLen: 3, - resultInArg0: true, - asm: x86.AVFMADD213PS, + name: "VRSQRTPS128", + argLen: 1, + asm: x86.AVRSQRTPS, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -19041,15 +19041,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VFMADDSUB213PS128", - argLen: 3, - resultInArg0: true, - asm: x86.AVFMADDSUB213PS, + name: "VRSQRT14PSMasked128", + argLen: 2, + asm: x86.AVRSQRT14PS, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -19057,15 +19055,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VFMSUBADD213PS128", - argLen: 3, - resultInArg0: true, - asm: x86.AVFMSUBADD213PS, + name: "VDIVPS128", + argLen: 2, + asm: x86.AVDIVPS, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -19073,10 +19069,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VADDPSMasked128", - argLen: 3, - commutative: true, - asm: x86.AVADDPS, + name: "VDIVPSMasked128", + argLen: 3, + asm: x86.AVDIVPS, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -19089,42 +19084,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VRCP14PSMasked128", - argLen: 2, - asm: x86.AVRCP14PS, - reg: regInfo{ - inputs: []inputInfo{ - {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, - { - name: "VRSQRT14PSMasked128", - argLen: 2, - asm: x86.AVRSQRT14PS, + name: "VFMADD213PS128", + argLen: 3, + resultInArg0: true, + asm: x86.AVFMADD213PS, reg: regInfo{ inputs: []inputInfo{ - {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, - { - name: "VDIVPSMasked128", - argLen: 3, - asm: x86.AVDIVPS, - reg: regInfo{ - inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -19149,16 +19117,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VFMADDSUB213PSMasked128", - argLen: 4, + name: "VFMADDSUB213PS128", + argLen: 3, resultInArg0: true, asm: x86.AVFMADDSUB213PS, reg: regInfo{ inputs: []inputInfo{ - {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -19166,10 +19133,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VFMSUBADD213PSMasked128", + name: "VFMADDSUB213PSMasked128", argLen: 4, resultInArg0: true, - asm: x86.AVFMSUBADD213PS, + asm: x86.AVFMADDSUB213PS, reg: regInfo{ inputs: []inputInfo{ {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -19183,15 +19150,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VMAXPSMasked128", - argLen: 3, - commutative: true, - asm: x86.AVMAXPS, + name: "VFMSUBADD213PS128", + argLen: 3, + resultInArg0: true, + asm: x86.AVFMSUBADD213PS, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -19199,15 +19166,16 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VMINPSMasked128", - argLen: 3, - commutative: true, - asm: x86.AVMINPS, + name: "VFMSUBADD213PSMasked128", + argLen: 4, + resultInArg0: true, + asm: x86.AVFMSUBADD213PS, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -19215,15 +19183,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VMULPSMasked128", - argLen: 3, + name: "VMAXPS128", + argLen: 2, commutative: true, - asm: x86.AVMULPS, + asm: x86.AVMAXPS, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -19231,9 +19198,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VSCALEFPSMasked128", - argLen: 3, - asm: x86.AVSCALEFPS, + name: "VMAXPSMasked128", + argLen: 3, + commutative: true, + asm: x86.AVMAXPS, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -19246,13 +19214,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VSQRTPSMasked128", - argLen: 2, - asm: x86.AVSQRTPS, + name: "VMINPS128", + argLen: 2, + commutative: true, + asm: x86.AVMINPS, reg: regInfo{ inputs: []inputInfo{ - {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -19260,9 +19229,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VSUBPSMasked128", - argLen: 3, - asm: x86.AVSUBPS, + name: "VMINPSMasked128", + argLen: 3, + commutative: true, + asm: x86.AVMINPS, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -19275,10 +19245,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VMAXPS128", + name: "VMULPS128", argLen: 2, commutative: true, - asm: x86.AVMAXPS, + asm: x86.AVMULPS, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -19290,10 +19260,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VMINPS128", - argLen: 2, - commutative: true, - asm: x86.AVMINPS, + name: "VSCALEFPS128", + argLen: 2, + asm: x86.AVSCALEFPS, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -19305,14 +19274,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VMULPS128", - argLen: 2, - commutative: true, - asm: x86.AVMULPS, + name: "VSCALEFPSMasked128", + argLen: 3, + asm: x86.AVSCALEFPS, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -19320,13 +19289,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VSCALEFPS128", - argLen: 2, - asm: x86.AVSCALEFPS, + name: "VMULPSMasked128", + argLen: 3, + commutative: true, + asm: x86.AVMULPS, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -19375,13 +19346,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VSUBPS128", + name: "VSQRTPSMasked128", argLen: 2, - asm: x86.AVSUBPS, + asm: x86.AVSQRTPS, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -19389,10 +19360,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VADDPS256", - argLen: 2, - commutative: true, - asm: x86.AVADDPS, + name: "VSUBPS128", + argLen: 2, + asm: x86.AVSUBPS, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -19404,13 +19374,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VADDSUBPS256", - argLen: 2, - asm: x86.AVADDSUBPS, + name: "VSUBPSMasked128", + argLen: 3, + asm: x86.AVSUBPS, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -19418,12 +19389,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VRCP14PS256", - argLen: 1, - asm: x86.AVRCP14PS, + name: "VADDPS256", + argLen: 2, + commutative: true, + asm: x86.AVADDPS, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -19431,12 +19404,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VRSQRTPS256", - argLen: 1, - asm: x86.AVRSQRTPS, + name: "VADDPSMasked256", + argLen: 3, + commutative: true, + asm: x86.AVADDPS, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -19444,9 +19420,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VDIVPS256", + name: "VADDSUBPS256", argLen: 2, - asm: x86.AVDIVPS, + asm: x86.AVADDSUBPS, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -19458,15 +19434,12 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VFMADD213PS256", - argLen: 3, - resultInArg0: true, - asm: x86.AVFMADD213PS, + name: "VRCP14PS256", + argLen: 1, + asm: x86.AVRCP14PS, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -19474,15 +19447,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VFMADDSUB213PS256", - argLen: 3, - resultInArg0: true, - asm: x86.AVFMADDSUB213PS, + name: "VRCP14PSMasked256", + argLen: 2, + asm: x86.AVRCP14PS, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -19490,15 +19461,12 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VFMSUBADD213PS256", - argLen: 3, - resultInArg0: true, - asm: x86.AVFMSUBADD213PS, + name: "VRSQRTPS256", + argLen: 1, + asm: x86.AVRSQRTPS, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -19506,15 +19474,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VADDPSMasked256", - argLen: 3, - commutative: true, - asm: x86.AVADDPS, + name: "VRSQRT14PSMasked256", + argLen: 2, + asm: x86.AVRSQRT14PS, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -19522,13 +19488,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VRCP14PSMasked256", + name: "VDIVPS256", argLen: 2, - asm: x86.AVRCP14PS, + asm: x86.AVDIVPS, reg: regInfo{ inputs: []inputInfo{ - {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -19536,13 +19502,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VRSQRT14PSMasked256", - argLen: 2, - asm: x86.AVRSQRT14PS, + name: "VDIVPSMasked256", + argLen: 3, + asm: x86.AVDIVPS, reg: regInfo{ inputs: []inputInfo{ - {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -19550,14 +19517,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VDIVPSMasked256", - argLen: 3, - asm: x86.AVDIVPS, + name: "VFMADD213PS256", + argLen: 3, + resultInArg0: true, + asm: x86.AVFMADD213PS, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -19582,16 +19550,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VFMADDSUB213PSMasked256", - argLen: 4, + name: "VFMADDSUB213PS256", + argLen: 3, resultInArg0: true, asm: x86.AVFMADDSUB213PS, reg: regInfo{ inputs: []inputInfo{ - {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -19599,10 +19566,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VFMSUBADD213PSMasked256", + name: "VFMADDSUB213PSMasked256", argLen: 4, resultInArg0: true, - asm: x86.AVFMSUBADD213PS, + asm: x86.AVFMADDSUB213PS, reg: regInfo{ inputs: []inputInfo{ {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -19616,15 +19583,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VMAXPSMasked256", - argLen: 3, - commutative: true, - asm: x86.AVMAXPS, + name: "VFMSUBADD213PS256", + argLen: 3, + resultInArg0: true, + asm: x86.AVFMSUBADD213PS, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -19632,15 +19599,16 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VMINPSMasked256", - argLen: 3, - commutative: true, - asm: x86.AVMINPS, + name: "VFMSUBADD213PSMasked256", + argLen: 4, + resultInArg0: true, + asm: x86.AVFMSUBADD213PS, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -19648,15 +19616,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VMULPSMasked256", - argLen: 3, + name: "VMAXPS256", + argLen: 2, commutative: true, - asm: x86.AVMULPS, + asm: x86.AVMAXPS, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -19664,9 +19631,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VSCALEFPSMasked256", - argLen: 3, - asm: x86.AVSCALEFPS, + name: "VMAXPSMasked256", + argLen: 3, + commutative: true, + asm: x86.AVMAXPS, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -19679,13 +19647,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VSQRTPSMasked256", - argLen: 2, - asm: x86.AVSQRTPS, + name: "VMINPS256", + argLen: 2, + commutative: true, + asm: x86.AVMINPS, reg: regInfo{ inputs: []inputInfo{ - {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -19693,9 +19662,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VSUBPSMasked256", - argLen: 3, - asm: x86.AVSUBPS, + name: "VMINPSMasked256", + argLen: 3, + commutative: true, + asm: x86.AVMINPS, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -19708,10 +19678,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VMAXPS256", + name: "VMULPS256", argLen: 2, commutative: true, - asm: x86.AVMAXPS, + asm: x86.AVMULPS, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -19723,10 +19693,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VMINPS256", - argLen: 2, - commutative: true, - asm: x86.AVMINPS, + name: "VSCALEFPS256", + argLen: 2, + asm: x86.AVSCALEFPS, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -19738,14 +19707,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VMULPS256", - argLen: 2, - commutative: true, - asm: x86.AVMULPS, + name: "VSCALEFPSMasked256", + argLen: 3, + asm: x86.AVSCALEFPS, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -19753,13 +19722,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VSCALEFPS256", - argLen: 2, - asm: x86.AVSCALEFPS, + name: "VMULPSMasked256", + argLen: 3, + commutative: true, + asm: x86.AVMULPS, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -19808,13 +19779,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VSUBPS256", + name: "VSQRTPSMasked256", argLen: 2, - asm: x86.AVSUBPS, + asm: x86.AVSQRTPS, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -19822,10 +19793,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VADDPD128", - argLen: 2, - commutative: true, - asm: x86.AVADDPD, + name: "VSUBPS256", + argLen: 2, + asm: x86.AVSUBPS, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -19837,13 +19807,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VADDSUBPD128", - argLen: 2, - asm: x86.AVADDSUBPD, + name: "VSUBPSMasked256", + argLen: 3, + asm: x86.AVSUBPS, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -19851,12 +19822,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VRCP14PD128", - argLen: 1, - asm: x86.AVRCP14PD, + name: "VADDPD128", + argLen: 2, + commutative: true, + asm: x86.AVADDPD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -19864,12 +19837,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VRSQRT14PD128", - argLen: 1, - asm: x86.AVRSQRT14PD, + name: "VADDPDMasked128", + argLen: 3, + commutative: true, + asm: x86.AVADDPD, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -19877,9 +19853,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VDIVPD128", + name: "VADDSUBPD128", argLen: 2, - asm: x86.AVDIVPD, + asm: x86.AVADDSUBPD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -19891,15 +19867,12 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VFMADD213PD128", - argLen: 3, - resultInArg0: true, - asm: x86.AVFMADD213PD, + name: "VRCP14PD128", + argLen: 1, + asm: x86.AVRCP14PD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -19907,15 +19880,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VFMADDSUB213PD128", - argLen: 3, - resultInArg0: true, - asm: x86.AVFMADDSUB213PD, + name: "VRCP14PDMasked128", + argLen: 2, + asm: x86.AVRCP14PD, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -19923,31 +19894,12 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VFMSUBADD213PD128", - argLen: 3, - resultInArg0: true, - asm: x86.AVFMSUBADD213PD, + name: "VRSQRT14PD128", + argLen: 1, + asm: x86.AVRSQRT14PD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, - { - name: "VADDPDMasked128", - argLen: 3, - commutative: true, - asm: x86.AVADDPD, - reg: regInfo{ - inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -19955,9 +19907,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VRCP14PDMasked128", + name: "VRSQRT14PDMasked128", argLen: 2, - asm: x86.AVRCP14PD, + asm: x86.AVRSQRT14PD, reg: regInfo{ inputs: []inputInfo{ {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -19969,13 +19921,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VRSQRT14PDMasked128", + name: "VDIVPD128", argLen: 2, - asm: x86.AVRSQRT14PD, + asm: x86.AVDIVPD, reg: regInfo{ inputs: []inputInfo{ - {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -19998,16 +19950,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VFMADD213PDMasked128", - argLen: 4, + name: "VFMADD213PD128", + argLen: 3, resultInArg0: true, asm: x86.AVFMADD213PD, reg: regInfo{ inputs: []inputInfo{ - {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -20015,10 +19966,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VFMADDSUB213PDMasked128", + name: "VFMADD213PDMasked128", argLen: 4, resultInArg0: true, - asm: x86.AVFMADDSUB213PD, + asm: x86.AVFMADD213PD, reg: regInfo{ inputs: []inputInfo{ {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -20032,32 +19983,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VFMSUBADD213PDMasked128", - argLen: 4, + name: "VFMADDSUB213PD128", + argLen: 3, resultInArg0: true, - asm: x86.AVFMSUBADD213PD, + asm: x86.AVFMADDSUB213PD, reg: regInfo{ inputs: []inputInfo{ - {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, - { - name: "VMAXPDMasked128", - argLen: 3, - commutative: true, - asm: x86.AVMAXPD, - reg: regInfo{ - inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -20065,15 +19999,16 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VMINPDMasked128", - argLen: 3, - commutative: true, - asm: x86.AVMINPD, + name: "VFMADDSUB213PDMasked128", + argLen: 4, + resultInArg0: true, + asm: x86.AVFMADDSUB213PD, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -20081,15 +20016,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VMULPDMasked128", - argLen: 3, - commutative: true, - asm: x86.AVMULPD, + name: "VFMSUBADD213PD128", + argLen: 3, + resultInArg0: true, + asm: x86.AVFMSUBADD213PD, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -20097,14 +20032,16 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VSCALEFPDMasked128", - argLen: 3, - asm: x86.AVSCALEFPD, + name: "VFMSUBADD213PDMasked128", + argLen: 4, + resultInArg0: true, + asm: x86.AVFMSUBADD213PD, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -20112,13 +20049,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VSQRTPDMasked128", - argLen: 2, - asm: x86.AVSQRTPD, + name: "VMAXPD128", + argLen: 2, + commutative: true, + asm: x86.AVMAXPD, reg: regInfo{ inputs: []inputInfo{ - {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -20126,9 +20064,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VSUBPDMasked128", - argLen: 3, - asm: x86.AVSUBPD, + name: "VMAXPDMasked128", + argLen: 3, + commutative: true, + asm: x86.AVMAXPD, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -20141,10 +20080,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VMAXPD128", + name: "VMINPD128", argLen: 2, commutative: true, - asm: x86.AVMAXPD, + asm: x86.AVMINPD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -20156,14 +20095,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VMINPD128", - argLen: 2, + name: "VMINPDMasked128", + argLen: 3, commutative: true, asm: x86.AVMINPD, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -20200,13 +20140,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VHADDPD128", - argLen: 2, - asm: x86.AVHADDPD, + name: "VSCALEFPDMasked128", + argLen: 3, + asm: x86.AVSCALEFPD, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -20214,13 +20155,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VHSUBPD128", - argLen: 2, - asm: x86.AVHSUBPD, + name: "VMULPDMasked128", + argLen: 3, + commutative: true, + asm: x86.AVMULPD, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -20228,12 +20171,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VSQRTPD128", - argLen: 1, - asm: x86.AVSQRTPD, + name: "VHADDPD128", + argLen: 2, + asm: x86.AVHADDPD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -20241,9 +20185,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VSUBPD128", + name: "VHSUBPD128", argLen: 2, - asm: x86.AVSUBPD, + asm: x86.AVHSUBPD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -20255,14 +20199,12 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VADDPD256", - argLen: 2, - commutative: true, - asm: x86.AVADDPD, + name: "VSQRTPD128", + argLen: 1, + asm: x86.AVSQRTPD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -20270,13 +20212,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VADDSUBPD256", + name: "VSQRTPDMasked128", argLen: 2, - asm: x86.AVADDSUBPD, + asm: x86.AVSQRTPD, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -20284,12 +20226,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VRCP14PD256", - argLen: 1, - asm: x86.AVRCP14PD, + name: "VSUBPD128", + argLen: 2, + asm: x86.AVSUBPD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -20297,12 +20240,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VRSQRT14PD256", - argLen: 1, - asm: x86.AVRSQRT14PD, + name: "VSUBPDMasked128", + argLen: 3, + asm: x86.AVSUBPD, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -20310,9 +20255,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VDIVPD256", - argLen: 2, - asm: x86.AVDIVPD, + name: "VADDPD256", + argLen: 2, + commutative: true, + asm: x86.AVADDPD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -20324,15 +20270,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VFMADD213PD256", - argLen: 3, - resultInArg0: true, - asm: x86.AVFMADD213PD, + name: "VADDPDMasked256", + argLen: 3, + commutative: true, + asm: x86.AVADDPD, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -20340,15 +20286,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VFMADDSUB213PD256", - argLen: 3, - resultInArg0: true, - asm: x86.AVFMADDSUB213PD, + name: "VADDSUBPD256", + argLen: 2, + asm: x86.AVADDSUBPD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -20356,15 +20300,12 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VFMSUBADD213PD256", - argLen: 3, - resultInArg0: true, - asm: x86.AVFMSUBADD213PD, + name: "VRCP14PD256", + argLen: 1, + asm: x86.AVRCP14PD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -20372,15 +20313,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VADDPDMasked256", - argLen: 3, - commutative: true, - asm: x86.AVADDPD, + name: "VRCP14PDMasked256", + argLen: 2, + asm: x86.AVRCP14PD, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -20388,13 +20327,12 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VRCP14PDMasked256", - argLen: 2, - asm: x86.AVRCP14PD, + name: "VRSQRT14PD256", + argLen: 1, + asm: x86.AVRSQRT14PD, reg: regInfo{ inputs: []inputInfo{ - {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -20416,14 +20354,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VDIVPDMasked256", - argLen: 3, + name: "VDIVPD256", + argLen: 2, asm: x86.AVDIVPD, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -20431,16 +20368,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VFMADD213PDMasked256", - argLen: 4, - resultInArg0: true, - asm: x86.AVFMADD213PD, + name: "VDIVPDMasked256", + argLen: 3, + asm: x86.AVDIVPD, reg: regInfo{ inputs: []inputInfo{ - {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -20448,16 +20383,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VFMADDSUB213PDMasked256", - argLen: 4, + name: "VFMADD213PD256", + argLen: 3, resultInArg0: true, - asm: x86.AVFMADDSUB213PD, + asm: x86.AVFMADD213PD, reg: regInfo{ inputs: []inputInfo{ - {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -20465,10 +20399,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VFMSUBADD213PDMasked256", + name: "VFMADD213PDMasked256", argLen: 4, resultInArg0: true, - asm: x86.AVFMSUBADD213PD, + asm: x86.AVFMADD213PD, reg: regInfo{ inputs: []inputInfo{ {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -20482,15 +20416,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VMAXPDMasked256", - argLen: 3, - commutative: true, - asm: x86.AVMAXPD, + name: "VFMADDSUB213PD256", + argLen: 3, + resultInArg0: true, + asm: x86.AVFMADDSUB213PD, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -20498,15 +20432,16 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VMINPDMasked256", - argLen: 3, - commutative: true, - asm: x86.AVMINPD, + name: "VFMADDSUB213PDMasked256", + argLen: 4, + resultInArg0: true, + asm: x86.AVFMADDSUB213PD, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -20514,15 +20449,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VMULPDMasked256", - argLen: 3, - commutative: true, - asm: x86.AVMULPD, + name: "VFMSUBADD213PD256", + argLen: 3, + resultInArg0: true, + asm: x86.AVFMSUBADD213PD, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -20530,14 +20465,16 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VSCALEFPDMasked256", - argLen: 3, - asm: x86.AVSCALEFPD, + name: "VFMSUBADD213PDMasked256", + argLen: 4, + resultInArg0: true, + asm: x86.AVFMSUBADD213PD, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -20545,13 +20482,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VSQRTPDMasked256", - argLen: 2, - asm: x86.AVSQRTPD, + name: "VMAXPD256", + argLen: 2, + commutative: true, + asm: x86.AVMAXPD, reg: regInfo{ inputs: []inputInfo{ - {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -20559,9 +20497,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VSUBPDMasked256", - argLen: 3, - asm: x86.AVSUBPD, + name: "VMAXPDMasked256", + argLen: 3, + commutative: true, + asm: x86.AVMAXPD, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -20574,10 +20513,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VMAXPD256", + name: "VMINPD256", argLen: 2, commutative: true, - asm: x86.AVMAXPD, + asm: x86.AVMINPD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -20589,14 +20528,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VMINPD256", - argLen: 2, + name: "VMINPDMasked256", + argLen: 3, commutative: true, asm: x86.AVMINPD, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -20633,27 +20573,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VHADDPD256", - argLen: 2, - asm: x86.AVHADDPD, - reg: regInfo{ - inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, - { - name: "VHSUBPD256", - argLen: 2, - asm: x86.AVHSUBPD, + name: "VSCALEFPDMasked256", + argLen: 3, + asm: x86.AVSCALEFPD, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -20661,12 +20588,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VSQRTPD256", - argLen: 1, - asm: x86.AVSQRTPD, + name: "VMULPDMasked256", + argLen: 3, + commutative: true, + asm: x86.AVMULPD, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -20674,9 +20604,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VSUBPD256", + name: "VHADDPD256", argLen: 2, - asm: x86.AVSUBPD, + asm: x86.AVHADDPD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -20688,10 +20618,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VADDPD512", - argLen: 2, - commutative: true, - asm: x86.AVADDPD, + name: "VHSUBPD256", + argLen: 2, + asm: x86.AVHSUBPD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -20703,22 +20632,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VRCP14PD512", - argLen: 1, - asm: x86.AVRCP14PD, - reg: regInfo{ - inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, - { - name: "VRSQRT14PD512", + name: "VSQRTPD256", argLen: 1, - asm: x86.AVRSQRT14PD, + asm: x86.AVSQRTPD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -20729,13 +20645,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VDIVPD512", + name: "VSQRTPDMasked256", argLen: 2, - asm: x86.AVDIVPD, + asm: x86.AVSQRTPD, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -20743,15 +20659,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VFMADD213PD512", - argLen: 3, - resultInArg0: true, - asm: x86.AVFMADD213PD, + name: "VSUBPD256", + argLen: 2, + asm: x86.AVSUBPD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -20759,15 +20673,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VFMADDSUB213PD512", - argLen: 3, - resultInArg0: true, - asm: x86.AVFMADDSUB213PD, + name: "VSUBPDMasked256", + argLen: 3, + asm: x86.AVSUBPD, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -20775,15 +20688,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VFMSUBADD213PD512", - argLen: 3, - resultInArg0: true, - asm: x86.AVFMSUBADD213PD, + name: "VADDPD512", + argLen: 2, + commutative: true, + asm: x86.AVADDPD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -20807,13 +20719,12 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VRCP14PDMasked512", - argLen: 2, + name: "VRCP14PD512", + argLen: 1, asm: x86.AVRCP14PD, reg: regInfo{ inputs: []inputInfo{ - {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -20821,9 +20732,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VRSQRT14PDMasked512", + name: "VRCP14PDMasked512", argLen: 2, - asm: x86.AVRSQRT14PD, + asm: x86.AVRCP14PD, reg: regInfo{ inputs: []inputInfo{ {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -20835,128 +20746,26 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VDIVPDMasked512", - argLen: 3, - asm: x86.AVDIVPD, - reg: regInfo{ - inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, - { - name: "VFMADD213PDMasked512", - argLen: 4, - resultInArg0: true, - asm: x86.AVFMADD213PD, - reg: regInfo{ - inputs: []inputInfo{ - {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, - { - name: "VFMADDSUB213PDMasked512", - argLen: 4, - resultInArg0: true, - asm: x86.AVFMADDSUB213PD, - reg: regInfo{ - inputs: []inputInfo{ - {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, - { - name: "VFMSUBADD213PDMasked512", - argLen: 4, - resultInArg0: true, - asm: x86.AVFMSUBADD213PD, - reg: regInfo{ - inputs: []inputInfo{ - {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, - { - name: "VMAXPDMasked512", - argLen: 3, - commutative: true, - asm: x86.AVMAXPD, - reg: regInfo{ - inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, - { - name: "VMINPDMasked512", - argLen: 3, - commutative: true, - asm: x86.AVMINPD, + name: "VRSQRT14PD512", + argLen: 1, + asm: x86.AVRSQRT14PD, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, - }, - }, - { - name: "VMULPDMasked512", - argLen: 3, - commutative: true, - asm: x86.AVMULPD, - reg: regInfo{ - inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, }, }, { - name: "VSCALEFPDMasked512", - argLen: 3, - asm: x86.AVSCALEFPD, + name: "VRSQRT14PDMasked512", + argLen: 2, + asm: x86.AVRSQRT14PD, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -20964,13 +20773,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VSQRTPDMasked512", + name: "VDIVPD512", argLen: 2, - asm: x86.AVSQRTPD, + asm: x86.AVDIVPD, reg: regInfo{ inputs: []inputInfo{ - {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -20978,9 +20787,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VSUBPDMasked512", + name: "VDIVPDMasked512", argLen: 3, - asm: x86.AVSUBPD, + asm: x86.AVDIVPD, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -20993,29 +20802,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VMAXPD512", - argLen: 2, - commutative: true, - asm: x86.AVMAXPD, - reg: regInfo{ - inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, - { - name: "VMINPD512", - argLen: 2, - commutative: true, - asm: x86.AVMINPD, + name: "VFMADD213PD512", + argLen: 3, + resultInArg0: true, + asm: x86.AVFMADD213PD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -21023,14 +20818,16 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VMULPD512", - argLen: 2, - commutative: true, - asm: x86.AVMULPD, + name: "VFMADD213PDMasked512", + argLen: 4, + resultInArg0: true, + asm: x86.AVFMADD213PD, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -21038,13 +20835,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VSCALEFPD512", - argLen: 2, - asm: x86.AVSCALEFPD, + name: "VFMADDSUB213PD512", + argLen: 3, + resultInArg0: true, + asm: x86.AVFMADDSUB213PD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -21052,12 +20851,16 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VSQRTPD512", - argLen: 1, - asm: x86.AVSQRTPD, + name: "VFMADDSUB213PDMasked512", + argLen: 4, + resultInArg0: true, + asm: x86.AVFMADDSUB213PD, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -21065,13 +20868,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VSUBPD512", - argLen: 2, - asm: x86.AVSUBPD, + name: "VFMSUBADD213PD512", + argLen: 3, + resultInArg0: true, + asm: x86.AVFMSUBADD213PD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -21079,12 +20884,16 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPABSW256", - argLen: 1, - asm: x86.AVPABSW, + name: "VFMSUBADD213PDMasked512", + argLen: 4, + resultInArg0: true, + asm: x86.AVFMSUBADD213PD, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -21092,10 +20901,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPADDW256", + name: "VMAXPD512", argLen: 2, commutative: true, - asm: x86.AVPADDW, + asm: x86.AVMAXPD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -21107,14 +20916,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPCMPEQW256", - argLen: 2, + name: "VMAXPDMasked512", + argLen: 3, commutative: true, - asm: x86.AVPCMPEQW, + asm: x86.AVMAXPD, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -21122,9 +20932,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPCMPGTW256", - argLen: 2, - asm: x86.AVPCMPGTW, + name: "VMINPD512", + argLen: 2, + commutative: true, + asm: x86.AVMINPD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -21136,24 +20947,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPABSWMasked256", - argLen: 2, - asm: x86.AVPABSW, - reg: regInfo{ - inputs: []inputInfo{ - {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, - { - name: "VPADDWMasked256", + name: "VMINPDMasked512", argLen: 3, commutative: true, - asm: x86.AVPADDW, + asm: x86.AVMINPD, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -21166,15 +20963,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMAXSWMasked256", - argLen: 3, + name: "VMULPD512", + argLen: 2, commutative: true, - asm: x86.AVPMAXSW, + asm: x86.AVMULPD, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -21182,15 +20978,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMINSWMasked256", - argLen: 3, - commutative: true, - asm: x86.AVPMINSW, + name: "VSCALEFPD512", + argLen: 2, + asm: x86.AVSCALEFPD, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -21198,10 +20992,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMULHWMasked256", - argLen: 3, - commutative: true, - asm: x86.AVPMULHW, + name: "VSCALEFPDMasked512", + argLen: 3, + asm: x86.AVSCALEFPD, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -21214,10 +21007,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMULLWMasked256", + name: "VMULPDMasked512", argLen: 3, commutative: true, - asm: x86.AVPMULLW, + asm: x86.AVMULPD, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -21230,14 +21023,12 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMADDWDMasked256", - argLen: 3, - asm: x86.AVPMADDWD, + name: "VSQRTPD512", + argLen: 1, + asm: x86.AVSQRTPD, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -21245,9 +21036,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPOPCNTWMasked256", + name: "VSQRTPDMasked512", argLen: 2, - asm: x86.AVPOPCNTW, + asm: x86.AVSQRTPD, reg: regInfo{ inputs: []inputInfo{ {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -21259,15 +21050,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPADDSWMasked256", - argLen: 3, - commutative: true, - asm: x86.AVPADDSW, + name: "VSUBPD512", + argLen: 2, + asm: x86.AVSUBPD, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -21275,9 +21064,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSUBSWMasked256", + name: "VSUBPDMasked512", argLen: 3, - asm: x86.AVPSUBSW, + asm: x86.AVSUBPD, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -21290,14 +21079,12 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSLLVWMasked256", - argLen: 3, - asm: x86.AVPSLLVW, + name: "VPABSW256", + argLen: 1, + asm: x86.AVPABSW, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -21305,16 +21092,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHLDVWMasked256", - argLen: 4, - resultInArg0: true, - asm: x86.AVPSHLDVW, + name: "VPABSWMasked256", + argLen: 2, + asm: x86.AVPABSW, reg: regInfo{ inputs: []inputInfo{ - {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -21322,14 +21106,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRLVWMasked256", - argLen: 3, - asm: x86.AVPSRLVW, + name: "VPADDW256", + argLen: 2, + commutative: true, + asm: x86.AVPADDW, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -21337,16 +21121,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHRDVWMasked256", - argLen: 4, - resultInArg0: true, - asm: x86.AVPSHRDVW, + name: "VPADDWMasked256", + argLen: 3, + commutative: true, + asm: x86.AVPADDW, reg: regInfo{ inputs: []inputInfo{ - {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -21354,14 +21137,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRAVWMasked256", - argLen: 3, - asm: x86.AVPSRAVW, + name: "VPCMPEQW256", + argLen: 2, + commutative: true, + asm: x86.AVPCMPEQW, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -21369,14 +21152,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSUBWMasked256", - argLen: 3, - asm: x86.AVPSUBW, + name: "VPCMPGTW256", + argLen: 2, + asm: x86.AVPCMPGTW, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -21399,14 +21181,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMINSW256", - argLen: 2, + name: "VPMAXSWMasked256", + argLen: 3, commutative: true, - asm: x86.AVPMINSW, + asm: x86.AVPMAXSW, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -21414,10 +21197,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMULHW256", + name: "VPMINSW256", argLen: 2, commutative: true, - asm: x86.AVPMULHW, + asm: x86.AVPMINSW, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -21429,14 +21212,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMULLW256", - argLen: 2, + name: "VPMINSWMasked256", + argLen: 3, commutative: true, - asm: x86.AVPMULLW, + asm: x86.AVPMINSW, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -21444,9 +21228,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMADDWD256", - argLen: 2, - asm: x86.AVPMADDWD, + name: "VPMULHW256", + argLen: 2, + commutative: true, + asm: x86.AVPMULHW, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -21458,13 +21243,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPHADDW256", - argLen: 2, - asm: x86.AVPHADDW, + name: "VPMULHWMasked256", + argLen: 3, + commutative: true, + asm: x86.AVPMULHW, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -21472,9 +21259,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPHSUBW256", - argLen: 2, - asm: x86.AVPHSUBW, + name: "VPMULLW256", + argLen: 2, + commutative: true, + asm: x86.AVPMULLW, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -21486,12 +21274,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPOPCNTW256", - argLen: 1, - asm: x86.AVPOPCNTW, + name: "VPMULLWMasked256", + argLen: 3, + commutative: true, + asm: x86.AVPMULLW, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -21499,10 +21290,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPADDSW256", - argLen: 2, - commutative: true, - asm: x86.AVPADDSW, + name: "VPMADDWD256", + argLen: 2, + asm: x86.AVPMADDWD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -21514,13 +21304,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPHADDSW256", - argLen: 2, - asm: x86.AVPHADDSW, + name: "VPMADDWDMasked256", + argLen: 3, + asm: x86.AVPMADDWD, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -21528,9 +21319,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPHSUBSW256", + name: "VPHADDW256", argLen: 2, - asm: x86.AVPHSUBSW, + asm: x86.AVPHADDW, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -21542,9 +21333,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSUBSW256", + name: "VPHSUBW256", argLen: 2, - asm: x86.AVPSUBSW, + asm: x86.AVPHSUBW, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -21556,13 +21347,12 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSLLW256", - argLen: 2, - asm: x86.AVPSLLW, + name: "VPOPCNTW256", + argLen: 1, + asm: x86.AVPOPCNTW, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -21570,13 +21360,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRLW256", + name: "VPOPCNTWMasked256", argLen: 2, - asm: x86.AVPSRLW, + asm: x86.AVPOPCNTW, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -21584,9 +21374,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRAW256", - argLen: 2, - asm: x86.AVPSRAW, + name: "VPADDSW256", + argLen: 2, + commutative: true, + asm: x86.AVPADDSW, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -21598,13 +21389,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSLLVW256", - argLen: 2, - asm: x86.AVPSLLVW, + name: "VPADDSWMasked256", + argLen: 3, + commutative: true, + asm: x86.AVPADDSW, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -21612,15 +21405,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHLDVW256", - argLen: 3, - resultInArg0: true, - asm: x86.AVPSHLDVW, + name: "VPHADDSW256", + argLen: 2, + asm: x86.AVPHADDSW, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -21628,9 +21419,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRLVW256", + name: "VPHSUBSW256", argLen: 2, - asm: x86.AVPSRLVW, + asm: x86.AVPHSUBSW, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -21642,15 +21433,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHRDVW256", - argLen: 3, - resultInArg0: true, - asm: x86.AVPSHRDVW, + name: "VPSUBSW256", + argLen: 2, + asm: x86.AVPSUBSW, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -21658,13 +21447,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRAVW256", - argLen: 2, - asm: x86.AVPSRAVW, + name: "VPSUBSWMasked256", + argLen: 3, + asm: x86.AVPSUBSW, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -21672,9 +21462,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSIGNW256", + name: "VPSLLW256", argLen: 2, - asm: x86.AVPSIGNW, + asm: x86.AVPSLLW, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -21686,9 +21476,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSUBW256", + name: "VPSRLW256", argLen: 2, - asm: x86.AVPSUBW, + asm: x86.AVPSRLW, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -21700,12 +21490,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPABSW512", - argLen: 1, - asm: x86.AVPABSW, + name: "VPSRAW256", + argLen: 2, + asm: x86.AVPSRAW, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -21713,10 +21504,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPADDW512", - argLen: 2, - commutative: true, - asm: x86.AVPADDW, + name: "VPSLLVW256", + argLen: 2, + asm: x86.AVPSLLVW, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -21728,13 +21518,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPABSWMasked512", - argLen: 2, - asm: x86.AVPABSW, + name: "VPSHLDVW256", + argLen: 3, + resultInArg0: true, + asm: x86.AVPSHLDVW, reg: regInfo{ inputs: []inputInfo{ - {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -21742,15 +21534,16 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPADDWMasked512", - argLen: 3, - commutative: true, - asm: x86.AVPADDW, + name: "VPSHLDVWMasked256", + argLen: 4, + resultInArg0: true, + asm: x86.AVPSHLDVW, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -21758,10 +21551,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMAXSWMasked512", - argLen: 3, - commutative: true, - asm: x86.AVPMAXSW, + name: "VPSLLVWMasked256", + argLen: 3, + asm: x86.AVPSLLVW, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -21774,15 +21566,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMINSWMasked512", - argLen: 3, - commutative: true, - asm: x86.AVPMINSW, + name: "VPSRLVW256", + argLen: 2, + asm: x86.AVPSRLVW, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -21790,15 +21580,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMULHWMasked512", - argLen: 3, - commutative: true, - asm: x86.AVPMULHW, + name: "VPSHRDVW256", + argLen: 3, + resultInArg0: true, + asm: x86.AVPSHRDVW, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -21806,15 +21596,16 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMULLWMasked512", - argLen: 3, - commutative: true, - asm: x86.AVPMULLW, + name: "VPSHRDVWMasked256", + argLen: 4, + resultInArg0: true, + asm: x86.AVPSHRDVW, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -21822,9 +21613,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMADDWDMasked512", + name: "VPSRLVWMasked256", argLen: 3, - asm: x86.AVPMADDWD, + asm: x86.AVPSRLVW, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -21837,29 +21628,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPOPCNTWMasked512", + name: "VPSRAVW256", argLen: 2, - asm: x86.AVPOPCNTW, + asm: x86.AVPSRAVW, reg: regInfo{ inputs: []inputInfo{ - {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, - { - name: "VPADDSWMasked512", - argLen: 3, - commutative: true, - asm: x86.AVPADDSW, - reg: regInfo{ - inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -21867,9 +21642,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSUBSWMasked512", + name: "VPSRAVWMasked256", argLen: 3, - asm: x86.AVPSUBSW, + asm: x86.AVPSRAVW, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -21882,14 +21657,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSLLVWMasked512", - argLen: 3, - asm: x86.AVPSLLVW, + name: "VPSIGNW256", + argLen: 2, + asm: x86.AVPSIGNW, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -21897,16 +21671,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHLDVWMasked512", - argLen: 4, - resultInArg0: true, - asm: x86.AVPSHLDVW, + name: "VPSUBW256", + argLen: 2, + asm: x86.AVPSUBW, reg: regInfo{ inputs: []inputInfo{ - {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -21914,9 +21685,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRLVWMasked512", + name: "VPSUBWMasked256", argLen: 3, - asm: x86.AVPSRLVW, + asm: x86.AVPSUBW, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -21929,16 +21700,12 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHRDVWMasked512", - argLen: 4, - resultInArg0: true, - asm: x86.AVPSHRDVW, + name: "VPABSW512", + argLen: 1, + asm: x86.AVPABSW, reg: regInfo{ inputs: []inputInfo{ - {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -21946,14 +21713,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRAVWMasked512", - argLen: 3, - asm: x86.AVPSRAVW, + name: "VPABSWMasked512", + argLen: 2, + asm: x86.AVPABSW, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -21961,14 +21727,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSUBWMasked512", - argLen: 3, - asm: x86.AVPSUBW, + name: "VPADDW512", + argLen: 2, + commutative: true, + asm: x86.AVPADDW, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -21976,14 +21742,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMAXSW512", - argLen: 2, + name: "VPADDWMasked512", + argLen: 3, commutative: true, - asm: x86.AVPMAXSW, + asm: x86.AVPADDW, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -21991,10 +21758,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMINSW512", + name: "VPMAXSW512", argLen: 2, commutative: true, - asm: x86.AVPMINSW, + asm: x86.AVPMAXSW, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22006,14 +21773,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMULHW512", - argLen: 2, + name: "VPMAXSWMasked512", + argLen: 3, commutative: true, - asm: x86.AVPMULHW, + asm: x86.AVPMAXSW, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22021,10 +21789,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMULLW512", + name: "VPMINSW512", argLen: 2, commutative: true, - asm: x86.AVPMULLW, + asm: x86.AVPMINSW, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22036,13 +21804,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMADDWD512", - argLen: 2, - asm: x86.AVPMADDWD, + name: "VPMINSWMasked512", + argLen: 3, + commutative: true, + asm: x86.AVPMINSW, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22050,12 +21820,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPOPCNTW512", - argLen: 1, - asm: x86.AVPOPCNTW, + name: "VPMULHW512", + argLen: 2, + commutative: true, + asm: x86.AVPMULHW, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22063,14 +21835,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPADDSW512", - argLen: 2, + name: "VPMULHWMasked512", + argLen: 3, commutative: true, - asm: x86.AVPADDSW, + asm: x86.AVPMULHW, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22078,9 +21851,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSUBSW512", - argLen: 2, - asm: x86.AVPSUBSW, + name: "VPMULLW512", + argLen: 2, + commutative: true, + asm: x86.AVPMULLW, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22092,13 +21866,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSLLVW512", - argLen: 2, - asm: x86.AVPSLLVW, + name: "VPMULLWMasked512", + argLen: 3, + commutative: true, + asm: x86.AVPMULLW, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22106,15 +21882,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHLDVW512", - argLen: 3, - resultInArg0: true, - asm: x86.AVPSHLDVW, + name: "VPMADDWD512", + argLen: 2, + asm: x86.AVPMADDWD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22122,13 +21896,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRLVW512", - argLen: 2, - asm: x86.AVPSRLVW, + name: "VPMADDWDMasked512", + argLen: 3, + asm: x86.AVPMADDWD, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22136,15 +21911,12 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHRDVW512", - argLen: 3, - resultInArg0: true, - asm: x86.AVPSHRDVW, + name: "VPOPCNTW512", + argLen: 1, + asm: x86.AVPOPCNTW, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22152,13 +21924,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRAVW512", + name: "VPOPCNTWMasked512", argLen: 2, - asm: x86.AVPSRAVW, + asm: x86.AVPOPCNTW, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22166,9 +21938,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSUBW512", - argLen: 2, - asm: x86.AVPSUBW, + name: "VPADDSW512", + argLen: 2, + commutative: true, + asm: x86.AVPADDSW, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22180,12 +21953,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPABSW128", - argLen: 1, - asm: x86.AVPABSW, + name: "VPADDSWMasked512", + argLen: 3, + commutative: true, + asm: x86.AVPADDSW, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22193,10 +21969,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPADDW128", - argLen: 2, - commutative: true, - asm: x86.AVPADDW, + name: "VPSUBSW512", + argLen: 2, + asm: x86.AVPSUBSW, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22208,14 +21983,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPCMPEQW128", - argLen: 2, - commutative: true, - asm: x86.AVPCMPEQW, + name: "VPSUBSWMasked512", + argLen: 3, + asm: x86.AVPSUBSW, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22223,9 +21998,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPCMPGTW128", + name: "VPSLLVW512", argLen: 2, - asm: x86.AVPCMPGTW, + asm: x86.AVPSLLVW, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22237,13 +22012,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPABSWMasked128", - argLen: 2, - asm: x86.AVPABSW, + name: "VPSHLDVW512", + argLen: 3, + resultInArg0: true, + asm: x86.AVPSHLDVW, reg: regInfo{ inputs: []inputInfo{ - {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22251,15 +22028,16 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPADDWMasked128", - argLen: 3, - commutative: true, - asm: x86.AVPADDW, + name: "VPSHLDVWMasked512", + argLen: 4, + resultInArg0: true, + asm: x86.AVPSHLDVW, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22267,10 +22045,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMAXSWMasked128", - argLen: 3, - commutative: true, - asm: x86.AVPMAXSW, + name: "VPSLLVWMasked512", + argLen: 3, + asm: x86.AVPSLLVW, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -22283,15 +22060,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMINSWMasked128", - argLen: 3, - commutative: true, - asm: x86.AVPMINSW, + name: "VPSRLVW512", + argLen: 2, + asm: x86.AVPSRLVW, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22299,15 +22074,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMULHWMasked128", - argLen: 3, - commutative: true, - asm: x86.AVPMULHW, + name: "VPSHRDVW512", + argLen: 3, + resultInArg0: true, + asm: x86.AVPSHRDVW, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22315,15 +22090,16 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMULLWMasked128", - argLen: 3, - commutative: true, - asm: x86.AVPMULLW, + name: "VPSHRDVWMasked512", + argLen: 4, + resultInArg0: true, + asm: x86.AVPSHRDVW, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22331,9 +22107,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMADDWDMasked128", + name: "VPSRLVWMasked512", argLen: 3, - asm: x86.AVPMADDWD, + asm: x86.AVPSRLVW, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -22346,13 +22122,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPOPCNTWMasked128", + name: "VPSRAVW512", argLen: 2, - asm: x86.AVPOPCNTW, + asm: x86.AVPSRAVW, reg: regInfo{ inputs: []inputInfo{ - {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22360,10 +22136,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPADDSWMasked128", - argLen: 3, - commutative: true, - asm: x86.AVPADDSW, + name: "VPSRAVWMasked512", + argLen: 3, + asm: x86.AVPSRAVW, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -22376,14 +22151,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSUBSWMasked128", - argLen: 3, - asm: x86.AVPSUBSW, + name: "VPSUBW512", + argLen: 2, + asm: x86.AVPSUBW, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22391,9 +22165,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSLLVWMasked128", + name: "VPSUBWMasked512", argLen: 3, - asm: x86.AVPSLLVW, + asm: x86.AVPSUBW, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -22406,16 +22180,12 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHLDVWMasked128", - argLen: 4, - resultInArg0: true, - asm: x86.AVPSHLDVW, + name: "VPABSW128", + argLen: 1, + asm: x86.AVPABSW, reg: regInfo{ inputs: []inputInfo{ - {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22423,14 +22193,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRLVWMasked128", - argLen: 3, - asm: x86.AVPSRLVW, + name: "VPABSWMasked128", + argLen: 2, + asm: x86.AVPABSW, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22438,16 +22207,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHRDVWMasked128", - argLen: 4, - resultInArg0: true, - asm: x86.AVPSHRDVW, + name: "VPADDW128", + argLen: 2, + commutative: true, + asm: x86.AVPADDW, reg: regInfo{ inputs: []inputInfo{ - {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22455,9 +22222,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRAVWMasked128", - argLen: 3, - asm: x86.AVPSRAVW, + name: "VPADDWMasked128", + argLen: 3, + commutative: true, + asm: x86.AVPADDW, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -22470,14 +22238,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSUBWMasked128", - argLen: 3, - asm: x86.AVPSUBW, + name: "VPCMPEQW128", + argLen: 2, + commutative: true, + asm: x86.AVPCMPEQW, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22485,10 +22253,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMAXSW128", - argLen: 2, - commutative: true, - asm: x86.AVPMAXSW, + name: "VPCMPGTW128", + argLen: 2, + asm: x86.AVPCMPGTW, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22500,10 +22267,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMINSW128", + name: "VPMAXSW128", argLen: 2, commutative: true, - asm: x86.AVPMINSW, + asm: x86.AVPMAXSW, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22515,14 +22282,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMULHW128", - argLen: 2, + name: "VPMAXSWMasked128", + argLen: 3, commutative: true, - asm: x86.AVPMULHW, + asm: x86.AVPMAXSW, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22530,10 +22298,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMULLW128", + name: "VPMINSW128", argLen: 2, commutative: true, - asm: x86.AVPMULLW, + asm: x86.AVPMINSW, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22545,13 +22313,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMADDWD128", - argLen: 2, - asm: x86.AVPMADDWD, + name: "VPMINSWMasked128", + argLen: 3, + commutative: true, + asm: x86.AVPMINSW, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22559,9 +22329,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPHADDW128", - argLen: 2, - asm: x86.AVPHADDW, + name: "VPMULHW128", + argLen: 2, + commutative: true, + asm: x86.AVPMULHW, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22573,13 +22344,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPHSUBW128", - argLen: 2, - asm: x86.AVPHSUBW, + name: "VPMULHWMasked128", + argLen: 3, + commutative: true, + asm: x86.AVPMULHW, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22587,12 +22360,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPOPCNTW128", - argLen: 1, - asm: x86.AVPOPCNTW, + name: "VPMULLW128", + argLen: 2, + commutative: true, + asm: x86.AVPMULLW, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22600,14 +22375,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPADDSW128", - argLen: 2, + name: "VPMULLWMasked128", + argLen: 3, commutative: true, - asm: x86.AVPADDSW, + asm: x86.AVPMULLW, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22615,9 +22391,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPHADDSW128", + name: "VPMADDWD128", argLen: 2, - asm: x86.AVPHADDSW, + asm: x86.AVPMADDWD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22629,13 +22405,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPHSUBSW128", - argLen: 2, - asm: x86.AVPHSUBSW, + name: "VPMADDWDMasked128", + argLen: 3, + asm: x86.AVPMADDWD, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22643,9 +22420,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSUBSW128", + name: "VPHADDW128", argLen: 2, - asm: x86.AVPSUBSW, + asm: x86.AVPHADDW, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22657,9 +22434,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSLLW128", + name: "VPHSUBW128", argLen: 2, - asm: x86.AVPSLLW, + asm: x86.AVPHSUBW, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22671,13 +22448,12 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRLW128", - argLen: 2, - asm: x86.AVPSRLW, + name: "VPOPCNTW128", + argLen: 1, + asm: x86.AVPOPCNTW, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22685,13 +22461,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRAW128", + name: "VPOPCNTWMasked128", argLen: 2, - asm: x86.AVPSRAW, + asm: x86.AVPOPCNTW, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22699,9 +22475,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSLLVW128", - argLen: 2, - asm: x86.AVPSLLVW, + name: "VPADDSW128", + argLen: 2, + commutative: true, + asm: x86.AVPADDSW, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22713,15 +22490,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHLDVW128", - argLen: 3, - resultInArg0: true, - asm: x86.AVPSHLDVW, + name: "VPADDSWMasked128", + argLen: 3, + commutative: true, + asm: x86.AVPADDSW, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22729,9 +22506,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRLVW128", + name: "VPHADDSW128", argLen: 2, - asm: x86.AVPSRLVW, + asm: x86.AVPHADDSW, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22743,15 +22520,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHRDVW128", - argLen: 3, - resultInArg0: true, - asm: x86.AVPSHRDVW, + name: "VPHSUBSW128", + argLen: 2, + asm: x86.AVPHSUBSW, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22759,9 +22534,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRAVW128", + name: "VPSUBSW128", argLen: 2, - asm: x86.AVPSRAVW, + asm: x86.AVPSUBSW, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22773,13 +22548,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSIGNW128", - argLen: 2, - asm: x86.AVPSIGNW, + name: "VPSUBSWMasked128", + argLen: 3, + asm: x86.AVPSUBSW, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22787,9 +22563,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSUBW128", + name: "VPSLLW128", argLen: 2, - asm: x86.AVPSUBW, + asm: x86.AVPSLLW, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22801,12 +22577,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPABSD512", - argLen: 1, - asm: x86.AVPABSD, + name: "VPSRLW128", + argLen: 2, + asm: x86.AVPSRLW, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22814,10 +22591,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPADDD512", - argLen: 2, - commutative: true, - asm: x86.AVPADDD, + name: "VPSRAW128", + argLen: 2, + asm: x86.AVPSRAW, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22829,10 +22605,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPANDD512", - argLen: 2, - commutative: true, - asm: x86.AVPANDD, + name: "VPSLLVW128", + argLen: 2, + asm: x86.AVPSLLVW, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22844,13 +22619,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPANDND512", - argLen: 2, - asm: x86.AVPANDND, + name: "VPSHLDVW128", + argLen: 3, + resultInArg0: true, + asm: x86.AVPSHLDVW, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22858,13 +22635,16 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPABSDMasked512", - argLen: 2, - asm: x86.AVPABSD, + name: "VPSHLDVWMasked128", + argLen: 4, + resultInArg0: true, + asm: x86.AVPSHLDVW, reg: regInfo{ inputs: []inputInfo{ - {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22872,10 +22652,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPADDDMasked512", - argLen: 3, - commutative: true, - asm: x86.AVPADDD, + name: "VPSLLVWMasked128", + argLen: 3, + asm: x86.AVPSLLVW, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -22888,15 +22667,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPANDDMasked512", - argLen: 3, - commutative: true, - asm: x86.AVPANDD, + name: "VPSRLVW128", + argLen: 2, + asm: x86.AVPSRLVW, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22904,14 +22681,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPANDNDMasked512", - argLen: 3, - asm: x86.AVPANDND, + name: "VPSHRDVW128", + argLen: 3, + resultInArg0: true, + asm: x86.AVPSHRDVW, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22919,15 +22697,16 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMAXSDMasked512", - argLen: 3, - commutative: true, - asm: x86.AVPMAXSD, + name: "VPSHRDVWMasked128", + argLen: 4, + resultInArg0: true, + asm: x86.AVPSHRDVW, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22935,10 +22714,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMINSDMasked512", - argLen: 3, - commutative: true, - asm: x86.AVPMINSD, + name: "VPSRLVWMasked128", + argLen: 3, + asm: x86.AVPSRLVW, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -22951,15 +22729,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMULLDMasked512", - argLen: 3, - commutative: true, - asm: x86.AVPMULLD, + name: "VPSRAVW128", + argLen: 2, + asm: x86.AVPSRAVW, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -22967,10 +22743,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPORDMasked512", - argLen: 3, - commutative: true, - asm: x86.AVPORD, + name: "VPSRAVWMasked128", + argLen: 3, + asm: x86.AVPSRAVW, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -22983,16 +22758,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPDPWSSDMasked512", - argLen: 4, - resultInArg0: true, - asm: x86.AVPDPWSSD, + name: "VPSIGNW128", + argLen: 2, + asm: x86.AVPSIGNW, reg: regInfo{ inputs: []inputInfo{ - {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -23000,13 +22772,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPOPCNTDMasked512", + name: "VPSUBW128", argLen: 2, - asm: x86.AVPOPCNTD, + asm: x86.AVPSUBW, reg: regInfo{ inputs: []inputInfo{ - {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -23014,9 +22786,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPROLVDMasked512", + name: "VPSUBWMasked128", argLen: 3, - asm: x86.AVPROLVD, + asm: x86.AVPSUBW, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -23029,14 +22801,12 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPRORVDMasked512", - argLen: 3, - asm: x86.AVPRORVD, + name: "VPABSD512", + argLen: 1, + asm: x86.AVPABSD, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -23044,16 +22814,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPDPWSSDSMasked512", - argLen: 4, - resultInArg0: true, - asm: x86.AVPDPWSSDS, + name: "VPABSDMasked512", + argLen: 2, + asm: x86.AVPABSD, reg: regInfo{ inputs: []inputInfo{ - {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -23061,16 +22828,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPDPBUSDSMasked512", - argLen: 4, - resultInArg0: true, - asm: x86.AVPDPBUSDS, + name: "VPADDD512", + argLen: 2, + commutative: true, + asm: x86.AVPADDD, reg: regInfo{ inputs: []inputInfo{ - {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -23078,9 +22843,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSLLVDMasked512", - argLen: 3, - asm: x86.AVPSLLVD, + name: "VPADDDMasked512", + argLen: 3, + commutative: true, + asm: x86.AVPADDD, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -23093,16 +22859,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHLDVDMasked512", - argLen: 4, - resultInArg0: true, - asm: x86.AVPSHLDVD, + name: "VPANDD512", + argLen: 2, + commutative: true, + asm: x86.AVPANDD, reg: regInfo{ inputs: []inputInfo{ - {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -23110,9 +22874,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRLVDMasked512", - argLen: 3, - asm: x86.AVPSRLVD, + name: "VPANDDMasked512", + argLen: 3, + commutative: true, + asm: x86.AVPANDD, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -23125,16 +22890,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHRDVDMasked512", - argLen: 4, - resultInArg0: true, - asm: x86.AVPSHRDVD, + name: "VPANDND512", + argLen: 2, + asm: x86.AVPANDND, reg: regInfo{ inputs: []inputInfo{ - {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -23142,9 +22904,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRAVDMasked512", + name: "VPANDNDMasked512", argLen: 3, - asm: x86.AVPSRAVD, + asm: x86.AVPANDND, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -23157,9 +22919,25 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSUBDMasked512", - argLen: 3, - asm: x86.AVPSUBD, + name: "VPMAXSD512", + argLen: 2, + commutative: true, + asm: x86.AVPMAXSD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMAXSDMasked512", + argLen: 3, + commutative: true, + asm: x86.AVPMAXSD, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -23172,16 +22950,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPDPBUSDMasked512", - argLen: 4, - resultInArg0: true, - asm: x86.AVPDPBUSD, + name: "VPMINSD512", + argLen: 2, + commutative: true, + asm: x86.AVPMINSD, reg: regInfo{ inputs: []inputInfo{ - {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -23189,10 +22965,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPXORDMasked512", + name: "VPMINSDMasked512", argLen: 3, commutative: true, - asm: x86.AVPXORD, + asm: x86.AVPMINSD, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -23205,10 +22981,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMAXSD512", + name: "VPMULLD512", argLen: 2, commutative: true, - asm: x86.AVPMAXSD, + asm: x86.AVPMULLD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -23220,14 +22996,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMINSD512", - argLen: 2, + name: "VPMULLDMasked512", + argLen: 3, commutative: true, - asm: x86.AVPMINSD, + asm: x86.AVPMULLD, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -23235,10 +23012,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMULLD512", + name: "VPORD512", argLen: 2, commutative: true, - asm: x86.AVPMULLD, + asm: x86.AVPORD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -23250,14 +23027,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPORD512", - argLen: 2, + name: "VPORDMasked512", + argLen: 3, commutative: true, asm: x86.AVPORD, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -23280,6 +23058,23 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPDPWSSDMasked512", + argLen: 4, + resultInArg0: true, + asm: x86.AVPDPWSSD, + reg: regInfo{ + inputs: []inputInfo{ + {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPOPCNTD512", argLen: 1, @@ -23294,13 +23089,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPROLVD512", + name: "VPOPCNTDMasked512", argLen: 2, - asm: x86.AVPROLVD, + asm: x86.AVPOPCNTD, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -23308,9 +23103,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPRORVD512", + name: "VPROLVD512", argLen: 2, - asm: x86.AVPRORVD, + asm: x86.AVPROLVD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -23322,15 +23117,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPDPWSSDS512", - argLen: 3, - resultInArg0: true, - asm: x86.AVPDPWSSDS, + name: "VPROLVDMasked512", + argLen: 3, + asm: x86.AVPROLVD, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -23338,15 +23132,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPDPBUSDS512", - argLen: 3, - resultInArg0: true, - asm: x86.AVPDPBUSDS, + name: "VPRORVD512", + argLen: 2, + asm: x86.AVPRORVD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -23354,13 +23146,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSLLVD512", - argLen: 2, - asm: x86.AVPSLLVD, + name: "VPRORVDMasked512", + argLen: 3, + asm: x86.AVPRORVD, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -23368,10 +23161,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHLDVD512", + name: "VPDPWSSDS512", argLen: 3, resultInArg0: true, - asm: x86.AVPSHLDVD, + asm: x86.AVPDPWSSDS, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -23384,13 +23177,16 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRLVD512", - argLen: 2, - asm: x86.AVPSRLVD, + name: "VPDPWSSDSMasked512", + argLen: 4, + resultInArg0: true, + asm: x86.AVPDPWSSDS, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -23398,10 +23194,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHRDVD512", + name: "VPDPBUSDS512", argLen: 3, resultInArg0: true, - asm: x86.AVPSHRDVD, + asm: x86.AVPDPBUSDS, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -23414,13 +23210,16 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRAVD512", - argLen: 2, - asm: x86.AVPSRAVD, + name: "VPDPBUSDSMasked512", + argLen: 4, + resultInArg0: true, + asm: x86.AVPDPBUSDS, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -23428,9 +23227,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSUBD512", + name: "VPSLLVD512", argLen: 2, - asm: x86.AVPSUBD, + asm: x86.AVPSLLVD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -23442,10 +23241,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPDPBUSD512", + name: "VPSHLDVD512", argLen: 3, resultInArg0: true, - asm: x86.AVPDPBUSD, + asm: x86.AVPSHLDVD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -23458,27 +23257,16 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPXORD512", - argLen: 2, - commutative: true, - asm: x86.AVPXORD, - reg: regInfo{ - inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, - { - name: "VPABSD128", - argLen: 1, - asm: x86.AVPABSD, + name: "VPSHLDVDMasked512", + argLen: 4, + resultInArg0: true, + asm: x86.AVPSHLDVD, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -23486,14 +23274,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPADDD128", - argLen: 2, - commutative: true, - asm: x86.AVPADDD, + name: "VPSLLVDMasked512", + argLen: 3, + asm: x86.AVPSLLVD, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -23501,10 +23289,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPCMPEQD128", - argLen: 2, - commutative: true, - asm: x86.AVPCMPEQD, + name: "VPSRLVD512", + argLen: 2, + asm: x86.AVPSRLVD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -23516,13 +23303,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPCMPGTD128", - argLen: 2, - asm: x86.AVPCMPGTD, + name: "VPSHRDVD512", + argLen: 3, + resultInArg0: true, + asm: x86.AVPSHRDVD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -23530,29 +23319,16 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPABSDMasked128", - argLen: 2, - asm: x86.AVPABSD, - reg: regInfo{ - inputs: []inputInfo{ - {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, - { - name: "VPADDDMasked128", - argLen: 3, - commutative: true, - asm: x86.AVPADDD, + name: "VPSHRDVDMasked512", + argLen: 4, + resultInArg0: true, + asm: x86.AVPSHRDVD, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -23560,10 +23336,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPANDDMasked128", - argLen: 3, - commutative: true, - asm: x86.AVPANDD, + name: "VPSRLVDMasked512", + argLen: 3, + asm: x86.AVPSRLVD, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -23576,14 +23351,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPANDNDMasked128", - argLen: 3, - asm: x86.AVPANDND, + name: "VPSRAVD512", + argLen: 2, + asm: x86.AVPSRAVD, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -23591,10 +23365,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMAXSDMasked128", - argLen: 3, - commutative: true, - asm: x86.AVPMAXSD, + name: "VPSRAVDMasked512", + argLen: 3, + asm: x86.AVPSRAVD, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -23607,15 +23380,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMINSDMasked128", - argLen: 3, - commutative: true, - asm: x86.AVPMINSD, + name: "VPSUBD512", + argLen: 2, + asm: x86.AVPSUBD, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -23623,10 +23394,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMULLDMasked128", - argLen: 3, - commutative: true, - asm: x86.AVPMULLD, + name: "VPSUBDMasked512", + argLen: 3, + asm: x86.AVPSUBD, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -23639,15 +23409,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPORDMasked128", - argLen: 3, - commutative: true, - asm: x86.AVPORD, + name: "VPDPBUSD512", + argLen: 3, + resultInArg0: true, + asm: x86.AVPDPBUSD, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -23655,10 +23425,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPDPWSSDMasked128", + name: "VPDPBUSDMasked512", argLen: 4, resultInArg0: true, - asm: x86.AVPDPWSSD, + asm: x86.AVPDPBUSD, reg: regInfo{ inputs: []inputInfo{ {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -23672,13 +23442,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPOPCNTDMasked128", - argLen: 2, - asm: x86.AVPOPCNTD, + name: "VPXORD512", + argLen: 2, + commutative: true, + asm: x86.AVPXORD, reg: regInfo{ inputs: []inputInfo{ - {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -23686,9 +23457,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPROLVDMasked128", - argLen: 3, - asm: x86.AVPROLVD, + name: "VPXORDMasked512", + argLen: 3, + commutative: true, + asm: x86.AVPXORD, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -23701,14 +23473,12 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPRORVDMasked128", - argLen: 3, - asm: x86.AVPRORVD, + name: "VPABSD128", + argLen: 1, + asm: x86.AVPABSD, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -23716,16 +23486,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPDPWSSDSMasked128", - argLen: 4, - resultInArg0: true, - asm: x86.AVPDPWSSDS, + name: "VPABSDMasked128", + argLen: 2, + asm: x86.AVPABSD, reg: regInfo{ inputs: []inputInfo{ - {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -23733,16 +23500,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPDPBUSDSMasked128", - argLen: 4, - resultInArg0: true, - asm: x86.AVPDPBUSDS, + name: "VPADDD128", + argLen: 2, + commutative: true, + asm: x86.AVPADDD, reg: regInfo{ inputs: []inputInfo{ - {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -23750,9 +23515,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSLLVDMasked128", - argLen: 3, - asm: x86.AVPSLLVD, + name: "VPADDDMasked128", + argLen: 3, + commutative: true, + asm: x86.AVPADDD, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -23765,16 +23531,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHLDVDMasked128", - argLen: 4, - resultInArg0: true, - asm: x86.AVPSHLDVD, + name: "VPANDDMasked128", + argLen: 3, + commutative: true, + asm: x86.AVPANDD, reg: regInfo{ inputs: []inputInfo{ - {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -23782,9 +23547,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRLVDMasked128", + name: "VPANDNDMasked128", argLen: 3, - asm: x86.AVPSRLVD, + asm: x86.AVPANDND, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -23797,16 +23562,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHRDVDMasked128", - argLen: 4, - resultInArg0: true, - asm: x86.AVPSHRDVD, + name: "VPCMPEQD128", + argLen: 2, + commutative: true, + asm: x86.AVPCMPEQD, reg: regInfo{ inputs: []inputInfo{ - {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -23814,14 +23577,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRAVDMasked128", - argLen: 3, - asm: x86.AVPSRAVD, + name: "VPCMPGTD128", + argLen: 2, + asm: x86.AVPCMPGTD, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -23829,9 +23591,25 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSUBDMasked128", - argLen: 3, - asm: x86.AVPSUBD, + name: "VPMAXSD128", + argLen: 2, + commutative: true, + asm: x86.AVPMAXSD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMAXSDMasked128", + argLen: 3, + commutative: true, + asm: x86.AVPMAXSD, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -23844,16 +23622,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPDPBUSDMasked128", - argLen: 4, - resultInArg0: true, - asm: x86.AVPDPBUSD, + name: "VPMINSD128", + argLen: 2, + commutative: true, + asm: x86.AVPMINSD, reg: regInfo{ inputs: []inputInfo{ - {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -23861,10 +23637,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPXORDMasked128", + name: "VPMINSDMasked128", argLen: 3, commutative: true, - asm: x86.AVPXORD, + asm: x86.AVPMINSD, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -23877,10 +23653,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMAXSD128", + name: "VPMULDQ128", argLen: 2, commutative: true, - asm: x86.AVPMAXSD, + asm: x86.AVPMULDQ, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -23892,10 +23668,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMINSD128", + name: "VPMULLD128", argLen: 2, commutative: true, - asm: x86.AVPMINSD, + asm: x86.AVPMULLD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -23907,14 +23683,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMULDQ128", - argLen: 2, + name: "VPMULLDMasked128", + argLen: 3, commutative: true, - asm: x86.AVPMULDQ, + asm: x86.AVPMULLD, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -23922,14 +23699,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMULLD128", - argLen: 2, + name: "VPORDMasked128", + argLen: 3, commutative: true, - asm: x86.AVPMULLD, + asm: x86.AVPORD, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -23952,6 +23730,23 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPDPWSSDMasked128", + argLen: 4, + resultInArg0: true, + asm: x86.AVPDPWSSD, + reg: regInfo{ + inputs: []inputInfo{ + {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPHADDD128", argLen: 2, @@ -23994,13 +23789,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPROLVD128", + name: "VPOPCNTDMasked128", argLen: 2, - asm: x86.AVPROLVD, + asm: x86.AVPOPCNTD, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -24008,29 +23803,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPRORVD128", + name: "VPROLVD128", argLen: 2, - asm: x86.AVPRORVD, - reg: regInfo{ - inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, - { - name: "VPDPWSSDS128", - argLen: 3, - resultInArg0: true, - asm: x86.AVPDPWSSDS, + asm: x86.AVPROLVD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -24038,15 +23817,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPDPBUSDS128", - argLen: 3, - resultInArg0: true, - asm: x86.AVPDPBUSDS, + name: "VPROLVDMasked128", + argLen: 3, + asm: x86.AVPROLVD, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -24054,9 +23832,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSLLD128", + name: "VPRORVD128", argLen: 2, - asm: x86.AVPSLLD, + asm: x86.AVPRORVD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -24068,13 +23846,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRLD128", - argLen: 2, - asm: x86.AVPSRLD, + name: "VPRORVDMasked128", + argLen: 3, + asm: x86.AVPRORVD, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -24082,13 +23861,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRAD128", - argLen: 2, - asm: x86.AVPSRAD, + name: "VPDPWSSDS128", + argLen: 3, + resultInArg0: true, + asm: x86.AVPDPWSSDS, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -24096,13 +23877,16 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSLLVD128", - argLen: 2, - asm: x86.AVPSLLVD, + name: "VPDPWSSDSMasked128", + argLen: 4, + resultInArg0: true, + asm: x86.AVPDPWSSDS, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -24110,10 +23894,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHLDVD128", + name: "VPDPBUSDS128", argLen: 3, resultInArg0: true, - asm: x86.AVPSHLDVD, + asm: x86.AVPDPBUSDS, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -24126,13 +23910,16 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRLVD128", - argLen: 2, - asm: x86.AVPSRLVD, + name: "VPDPBUSDSMasked128", + argLen: 4, + resultInArg0: true, + asm: x86.AVPDPBUSDS, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -24140,15 +23927,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHRDVD128", - argLen: 3, - resultInArg0: true, - asm: x86.AVPSHRDVD, + name: "VPSLLD128", + argLen: 2, + asm: x86.AVPSLLD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -24156,9 +23941,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRAVD128", + name: "VPSRLD128", argLen: 2, - asm: x86.AVPSRAVD, + asm: x86.AVPSRLD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -24170,9 +23955,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSIGND128", + name: "VPSRAD128", argLen: 2, - asm: x86.AVPSIGND, + asm: x86.AVPSRAD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -24184,9 +23969,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSUBD128", + name: "VPSLLVD128", argLen: 2, - asm: x86.AVPSUBD, + asm: x86.AVPSLLVD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -24198,10 +23983,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPDPBUSD128", + name: "VPSHLDVD128", argLen: 3, resultInArg0: true, - asm: x86.AVPDPBUSD, + asm: x86.AVPSHLDVD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -24214,12 +23999,16 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPABSD256", - argLen: 1, - asm: x86.AVPABSD, + name: "VPSHLDVDMasked128", + argLen: 4, + resultInArg0: true, + asm: x86.AVPSHLDVD, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -24227,14 +24016,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPADDD256", - argLen: 2, - commutative: true, - asm: x86.AVPADDD, + name: "VPSLLVDMasked128", + argLen: 3, + asm: x86.AVPSLLVD, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -24242,10 +24031,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPCMPEQD256", - argLen: 2, - commutative: true, - asm: x86.AVPCMPEQD, + name: "VPSRLVD128", + argLen: 2, + asm: x86.AVPSRLVD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -24257,13 +24045,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPCMPGTD256", - argLen: 2, - asm: x86.AVPCMPGTD, + name: "VPSHRDVD128", + argLen: 3, + resultInArg0: true, + asm: x86.AVPSHRDVD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -24271,13 +24061,16 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPABSDMasked256", - argLen: 2, - asm: x86.AVPABSD, + name: "VPSHRDVDMasked128", + argLen: 4, + resultInArg0: true, + asm: x86.AVPSHRDVD, reg: regInfo{ inputs: []inputInfo{ - {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -24285,10 +24078,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPADDDMasked256", - argLen: 3, - commutative: true, - asm: x86.AVPADDD, + name: "VPSRLVDMasked128", + argLen: 3, + asm: x86.AVPSRLVD, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -24301,15 +24093,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPANDDMasked256", - argLen: 3, - commutative: true, - asm: x86.AVPANDD, + name: "VPSRAVD128", + argLen: 2, + asm: x86.AVPSRAVD, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -24317,9 +24107,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPANDNDMasked256", + name: "VPSRAVDMasked128", argLen: 3, - asm: x86.AVPANDND, + asm: x86.AVPSRAVD, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -24332,15 +24122,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMAXSDMasked256", - argLen: 3, - commutative: true, - asm: x86.AVPMAXSD, + name: "VPSIGND128", + argLen: 2, + asm: x86.AVPSIGND, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -24348,15 +24136,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMINSDMasked256", - argLen: 3, - commutative: true, - asm: x86.AVPMINSD, + name: "VPSUBD128", + argLen: 2, + asm: x86.AVPSUBD, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -24364,10 +24150,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMULLDMasked256", - argLen: 3, - commutative: true, - asm: x86.AVPMULLD, + name: "VPSUBDMasked128", + argLen: 3, + asm: x86.AVPSUBD, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -24380,15 +24165,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPORDMasked256", - argLen: 3, - commutative: true, - asm: x86.AVPORD, + name: "VPDPBUSD128", + argLen: 3, + resultInArg0: true, + asm: x86.AVPDPBUSD, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -24396,10 +24181,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPDPWSSDMasked256", + name: "VPDPBUSDMasked128", argLen: 4, resultInArg0: true, - asm: x86.AVPDPWSSD, + asm: x86.AVPDPBUSD, reg: regInfo{ inputs: []inputInfo{ {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -24413,23 +24198,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPOPCNTDMasked256", - argLen: 2, - asm: x86.AVPOPCNTD, - reg: regInfo{ - inputs: []inputInfo{ - {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, - { - name: "VPROLVDMasked256", - argLen: 3, - asm: x86.AVPROLVD, + name: "VPXORDMasked128", + argLen: 3, + commutative: true, + asm: x86.AVPXORD, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -24442,14 +24214,12 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPRORVDMasked256", - argLen: 3, - asm: x86.AVPRORVD, + name: "VPABSD256", + argLen: 1, + asm: x86.AVPABSD, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -24457,16 +24227,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPDPWSSDSMasked256", - argLen: 4, - resultInArg0: true, - asm: x86.AVPDPWSSDS, + name: "VPABSDMasked256", + argLen: 2, + asm: x86.AVPABSD, reg: regInfo{ inputs: []inputInfo{ - {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -24474,16 +24241,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPDPBUSDSMasked256", - argLen: 4, - resultInArg0: true, - asm: x86.AVPDPBUSDS, + name: "VPADDD256", + argLen: 2, + commutative: true, + asm: x86.AVPADDD, reg: regInfo{ inputs: []inputInfo{ - {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -24491,9 +24256,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSLLVDMasked256", - argLen: 3, - asm: x86.AVPSLLVD, + name: "VPADDDMasked256", + argLen: 3, + commutative: true, + asm: x86.AVPADDD, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -24506,16 +24272,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHLDVDMasked256", - argLen: 4, - resultInArg0: true, - asm: x86.AVPSHLDVD, + name: "VPANDDMasked256", + argLen: 3, + commutative: true, + asm: x86.AVPANDD, reg: regInfo{ inputs: []inputInfo{ - {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -24523,9 +24288,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRLVDMasked256", + name: "VPANDNDMasked256", argLen: 3, - asm: x86.AVPSRLVD, + asm: x86.AVPANDND, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -24538,16 +24303,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHRDVDMasked256", - argLen: 4, - resultInArg0: true, - asm: x86.AVPSHRDVD, + name: "VPCMPEQD256", + argLen: 2, + commutative: true, + asm: x86.AVPCMPEQD, reg: regInfo{ inputs: []inputInfo{ - {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -24555,14 +24318,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRAVDMasked256", - argLen: 3, - asm: x86.AVPSRAVD, + name: "VPCMPGTD256", + argLen: 2, + asm: x86.AVPCMPGTD, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -24570,9 +24332,25 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSUBDMasked256", - argLen: 3, - asm: x86.AVPSUBD, + name: "VPMAXSD256", + argLen: 2, + commutative: true, + asm: x86.AVPMAXSD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMAXSDMasked256", + argLen: 3, + commutative: true, + asm: x86.AVPMAXSD, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -24585,16 +24363,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPDPBUSDMasked256", - argLen: 4, - resultInArg0: true, - asm: x86.AVPDPBUSD, + name: "VPMINSD256", + argLen: 2, + commutative: true, + asm: x86.AVPMINSD, reg: regInfo{ inputs: []inputInfo{ - {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -24602,10 +24378,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPXORDMasked256", + name: "VPMINSDMasked256", argLen: 3, commutative: true, - asm: x86.AVPXORD, + asm: x86.AVPMINSD, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -24618,10 +24394,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMAXSD256", + name: "VPMULDQ256", argLen: 2, commutative: true, - asm: x86.AVPMAXSD, + asm: x86.AVPMULDQ, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -24633,10 +24409,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMINSD256", + name: "VPMULLD256", argLen: 2, commutative: true, - asm: x86.AVPMINSD, + asm: x86.AVPMULLD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -24648,14 +24424,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMULDQ256", - argLen: 2, + name: "VPMULLDMasked256", + argLen: 3, commutative: true, - asm: x86.AVPMULDQ, + asm: x86.AVPMULLD, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -24663,14 +24440,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMULLD256", - argLen: 2, + name: "VPORDMasked256", + argLen: 3, commutative: true, - asm: x86.AVPMULLD, + asm: x86.AVPORD, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -24693,6 +24471,23 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPDPWSSDMasked256", + argLen: 4, + resultInArg0: true, + asm: x86.AVPDPWSSD, + reg: regInfo{ + inputs: []inputInfo{ + {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPHADDD256", argLen: 2, @@ -24734,6 +24529,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPOPCNTDMasked256", + argLen: 2, + asm: x86.AVPOPCNTD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPROLVD256", argLen: 2, @@ -24748,6 +24557,21 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPROLVDMasked256", + argLen: 3, + asm: x86.AVPROLVD, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPRORVD256", argLen: 2, @@ -24762,6 +24586,21 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPRORVDMasked256", + argLen: 3, + asm: x86.AVPRORVD, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPDPWSSDS256", argLen: 3, @@ -24778,6 +24617,23 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPDPWSSDSMasked256", + argLen: 4, + resultInArg0: true, + asm: x86.AVPDPWSSDS, + reg: regInfo{ + inputs: []inputInfo{ + {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPDPBUSDS256", argLen: 3, @@ -24794,6 +24650,23 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPDPBUSDSMasked256", + argLen: 4, + resultInArg0: true, + asm: x86.AVPDPBUSDS, + reg: regInfo{ + inputs: []inputInfo{ + {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPSLLD256", argLen: 2, @@ -24866,6 +24739,38 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPSHLDVDMasked256", + argLen: 4, + resultInArg0: true, + asm: x86.AVPSHLDVD, + reg: regInfo{ + inputs: []inputInfo{ + {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSLLVDMasked256", + argLen: 3, + asm: x86.AVPSLLVD, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPSRLVD256", argLen: 2, @@ -24896,6 +24801,38 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPSHRDVDMasked256", + argLen: 4, + resultInArg0: true, + asm: x86.AVPSHRDVD, + reg: regInfo{ + inputs: []inputInfo{ + {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRLVDMasked256", + argLen: 3, + asm: x86.AVPSRLVD, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPSRAVD256", argLen: 2, @@ -24910,6 +24847,21 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPSRAVDMasked256", + argLen: 3, + asm: x86.AVPSRAVD, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPSIGND256", argLen: 2, @@ -24938,6 +24890,21 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPSUBDMasked256", + argLen: 3, + asm: x86.AVPSUBD, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPDPBUSD256", argLen: 3, @@ -24955,12 +24922,16 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPABSQ128", - argLen: 1, - asm: x86.AVPABSQ, + name: "VPDPBUSDMasked256", + argLen: 4, + resultInArg0: true, + asm: x86.AVPDPBUSD, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -24968,14 +24939,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPADDQ128", - argLen: 2, + name: "VPXORDMasked256", + argLen: 3, commutative: true, - asm: x86.AVPADDQ, + asm: x86.AVPXORD, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -24983,14 +24955,12 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPCMPEQQ128", - argLen: 2, - commutative: true, - asm: x86.AVPCMPEQQ, + name: "VPABSQ128", + argLen: 1, + asm: x86.AVPABSQ, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -25011,6 +24981,21 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPADDQ128", + argLen: 2, + commutative: true, + asm: x86.AVPADDQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPADDQMasked128", argLen: 3, @@ -25059,15 +25044,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMAXSQMasked128", - argLen: 3, + name: "VPCMPEQQ128", + argLen: 2, commutative: true, - asm: x86.AVPMAXSQ, + asm: x86.AVPCMPEQQ, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -25075,15 +25059,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMINSQMasked128", - argLen: 3, + name: "VPMAXSQ128", + argLen: 2, commutative: true, - asm: x86.AVPMINSQ, + asm: x86.AVPMAXSQ, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -25091,10 +25074,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMULDQMasked128", + name: "VPMAXSQMasked128", argLen: 3, commutative: true, - asm: x86.AVPMULDQ, + asm: x86.AVPMAXSQ, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -25107,15 +25090,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMULLQMasked128", - argLen: 3, + name: "VPMINSQ128", + argLen: 2, commutative: true, - asm: x86.AVPMULLQ, + asm: x86.AVPMINSQ, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -25123,10 +25105,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPORQMasked128", + name: "VPMINSQMasked128", argLen: 3, commutative: true, - asm: x86.AVPORQ, + asm: x86.AVPMINSQ, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -25139,23 +25121,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPOPCNTQMasked128", - argLen: 2, - asm: x86.AVPOPCNTQ, - reg: regInfo{ - inputs: []inputInfo{ - {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, - { - name: "VPROLVQMasked128", - argLen: 3, - asm: x86.AVPROLVQ, + name: "VPMULDQMasked128", + argLen: 3, + commutative: true, + asm: x86.AVPMULDQ, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -25168,14 +25137,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPRORVQMasked128", - argLen: 3, - asm: x86.AVPRORVQ, + name: "VPMULLQ128", + argLen: 2, + commutative: true, + asm: x86.AVPMULLQ, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -25183,9 +25152,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSLLQMasked128", - argLen: 3, - asm: x86.AVPSLLQ, + name: "VPMULLQMasked128", + argLen: 3, + commutative: true, + asm: x86.AVPMULLQ, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -25198,9 +25168,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRLQMasked128", - argLen: 3, - asm: x86.AVPSRLQ, + name: "VPORQMasked128", + argLen: 3, + commutative: true, + asm: x86.AVPORQ, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -25213,14 +25184,12 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRAQMasked128", - argLen: 3, - asm: x86.AVPSRAQ, + name: "VPOPCNTQ128", + argLen: 1, + asm: x86.AVPOPCNTQ, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -25228,14 +25197,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSLLVQMasked128", - argLen: 3, - asm: x86.AVPSLLVQ, + name: "VPOPCNTQMasked128", + argLen: 2, + asm: x86.AVPOPCNTQ, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -25243,16 +25211,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHLDVQMasked128", - argLen: 4, - resultInArg0: true, - asm: x86.AVPSHLDVQ, + name: "VPROLVQ128", + argLen: 2, + asm: x86.AVPROLVQ, reg: regInfo{ inputs: []inputInfo{ - {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -25260,9 +25225,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRLVQMasked128", + name: "VPROLVQMasked128", argLen: 3, - asm: x86.AVPSRLVQ, + asm: x86.AVPROLVQ, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -25275,16 +25240,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHRDVQMasked128", - argLen: 4, - resultInArg0: true, - asm: x86.AVPSHRDVQ, + name: "VPRORVQ128", + argLen: 2, + asm: x86.AVPRORVQ, reg: regInfo{ inputs: []inputInfo{ - {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -25292,9 +25254,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRAVQMasked128", + name: "VPRORVQMasked128", argLen: 3, - asm: x86.AVPSRAVQ, + asm: x86.AVPRORVQ, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -25307,14 +25269,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSUBQMasked128", - argLen: 3, - asm: x86.AVPSUBQ, + name: "VPSLLQ128", + argLen: 2, + asm: x86.AVPSLLQ, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -25322,10 +25283,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPXORQMasked128", - argLen: 3, - commutative: true, - asm: x86.AVPXORQ, + name: "VPSLLQMasked128", + argLen: 3, + asm: x86.AVPSLLQ, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -25338,10 +25298,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMAXSQ128", - argLen: 2, - commutative: true, - asm: x86.AVPMAXSQ, + name: "VPSRLQ128", + argLen: 2, + asm: x86.AVPSRLQ, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -25353,14 +25312,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMINSQ128", - argLen: 2, - commutative: true, - asm: x86.AVPMINSQ, + name: "VPSRLQMasked128", + argLen: 3, + asm: x86.AVPSRLQ, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -25368,10 +25327,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMULLQ128", - argLen: 2, - commutative: true, - asm: x86.AVPMULLQ, + name: "VPSRAQ128", + argLen: 2, + asm: x86.AVPSRAQ, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -25383,12 +25341,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPOPCNTQ128", - argLen: 1, - asm: x86.AVPOPCNTQ, + name: "VPSRAQMasked128", + argLen: 3, + asm: x86.AVPSRAQ, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -25396,9 +25356,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPROLVQ128", + name: "VPSLLVQ128", argLen: 2, - asm: x86.AVPROLVQ, + asm: x86.AVPSLLVQ, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -25410,13 +25370,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPRORVQ128", - argLen: 2, - asm: x86.AVPRORVQ, + name: "VPSHLDVQ128", + argLen: 3, + resultInArg0: true, + asm: x86.AVPSHLDVQ, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -25424,13 +25386,16 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSLLQ128", - argLen: 2, - asm: x86.AVPSLLQ, + name: "VPSHLDVQMasked128", + argLen: 4, + resultInArg0: true, + asm: x86.AVPSHLDVQ, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -25438,13 +25403,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRLQ128", - argLen: 2, - asm: x86.AVPSRLQ, + name: "VPSLLVQMasked128", + argLen: 3, + asm: x86.AVPSLLVQ, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -25452,9 +25418,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRAQ128", + name: "VPSRLVQ128", argLen: 2, - asm: x86.AVPSRAQ, + asm: x86.AVPSRLVQ, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -25466,13 +25432,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSLLVQ128", - argLen: 2, - asm: x86.AVPSLLVQ, + name: "VPSHRDVQ128", + argLen: 3, + resultInArg0: true, + asm: x86.AVPSHRDVQ, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -25480,15 +25448,16 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHLDVQ128", - argLen: 3, + name: "VPSHRDVQMasked128", + argLen: 4, resultInArg0: true, - asm: x86.AVPSHLDVQ, + asm: x86.AVPSHRDVQ, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -25496,13 +25465,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRLVQ128", - argLen: 2, + name: "VPSRLVQMasked128", + argLen: 3, asm: x86.AVPSRLVQ, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -25510,15 +25480,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHRDVQ128", - argLen: 3, - resultInArg0: true, - asm: x86.AVPSHRDVQ, + name: "VPSRAVQ128", + argLen: 2, + asm: x86.AVPSRAVQ, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -25526,13 +25494,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRAVQ128", - argLen: 2, + name: "VPSRAVQMasked128", + argLen: 3, asm: x86.AVPSRAVQ, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -25554,12 +25523,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPABSQ256", - argLen: 1, - asm: x86.AVPABSQ, + name: "VPSUBQMasked128", + argLen: 3, + asm: x86.AVPSUBQ, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -25567,14 +25538,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPADDQ256", - argLen: 2, + name: "VPXORQMasked128", + argLen: 3, commutative: true, - asm: x86.AVPADDQ, + asm: x86.AVPXORQ, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -25582,14 +25554,12 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPCMPEQQ256", - argLen: 2, - commutative: true, - asm: x86.AVPCMPEQQ, + name: "VPABSQ256", + argLen: 1, + asm: x86.AVPABSQ, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -25597,13 +25567,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPCMPGTQ256", + name: "VPABSQMasked256", argLen: 2, - asm: x86.AVPCMPGTQ, + asm: x86.AVPABSQ, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -25611,13 +25581,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPABSQMasked256", - argLen: 2, - asm: x86.AVPABSQ, + name: "VPADDQ256", + argLen: 2, + commutative: true, + asm: x86.AVPADDQ, reg: regInfo{ inputs: []inputInfo{ - {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -25671,6 +25642,50 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPCMPEQQ256", + argLen: 2, + commutative: true, + asm: x86.AVPCMPEQQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPCMPGTQ256", + argLen: 2, + asm: x86.AVPCMPGTQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPMAXSQ256", + argLen: 2, + commutative: true, + asm: x86.AVPMAXSQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPMAXSQMasked256", argLen: 3, @@ -25687,6 +25702,21 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPMINSQ256", + argLen: 2, + commutative: true, + asm: x86.AVPMINSQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPMINSQMasked256", argLen: 3, @@ -25720,15 +25750,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMULLQMasked256", - argLen: 3, + name: "VPMULLQ256", + argLen: 2, commutative: true, asm: x86.AVPMULLQ, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -25736,10 +25765,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPORQMasked256", + name: "VPMULLQMasked256", argLen: 3, commutative: true, - asm: x86.AVPORQ, + asm: x86.AVPMULLQ, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -25752,13 +25781,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPOPCNTQMasked256", - argLen: 2, - asm: x86.AVPOPCNTQ, + name: "VPORQMasked256", + argLen: 3, + commutative: true, + asm: x86.AVPORQ, reg: regInfo{ inputs: []inputInfo{ - {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -25766,14 +25797,12 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPROLVQMasked256", - argLen: 3, - asm: x86.AVPROLVQ, + name: "VPOPCNTQ256", + argLen: 1, + asm: x86.AVPOPCNTQ, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -25781,14 +25810,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPRORVQMasked256", - argLen: 3, - asm: x86.AVPRORVQ, + name: "VPOPCNTQMasked256", + argLen: 2, + asm: x86.AVPOPCNTQ, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -25796,14 +25824,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSLLQMasked256", - argLen: 3, - asm: x86.AVPSLLQ, + name: "VPROLVQ256", + argLen: 2, + asm: x86.AVPROLVQ, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -25811,9 +25838,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRLQMasked256", + name: "VPROLVQMasked256", argLen: 3, - asm: x86.AVPSRLQ, + asm: x86.AVPROLVQ, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -25826,14 +25853,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRAQMasked256", - argLen: 3, - asm: x86.AVPSRAQ, + name: "VPRORVQ256", + argLen: 2, + asm: x86.AVPRORVQ, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -25841,9 +25867,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSLLVQMasked256", + name: "VPRORVQMasked256", argLen: 3, - asm: x86.AVPSLLVQ, + asm: x86.AVPRORVQ, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -25856,16 +25882,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHLDVQMasked256", - argLen: 4, - resultInArg0: true, - asm: x86.AVPSHLDVQ, + name: "VPSLLQ256", + argLen: 2, + asm: x86.AVPSLLQ, reg: regInfo{ inputs: []inputInfo{ - {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -25873,9 +25896,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRLVQMasked256", + name: "VPSLLQMasked256", argLen: 3, - asm: x86.AVPSRLVQ, + asm: x86.AVPSLLQ, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -25888,16 +25911,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHRDVQMasked256", - argLen: 4, - resultInArg0: true, - asm: x86.AVPSHRDVQ, + name: "VPSRLQ256", + argLen: 2, + asm: x86.AVPSRLQ, reg: regInfo{ inputs: []inputInfo{ - {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -25905,9 +25925,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRAVQMasked256", + name: "VPSRLQMasked256", argLen: 3, - asm: x86.AVPSRAVQ, + asm: x86.AVPSRLQ, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -25920,14 +25940,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSUBQMasked256", - argLen: 3, - asm: x86.AVPSUBQ, + name: "VPSRAQ256", + argLen: 2, + asm: x86.AVPSRAQ, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -25935,10 +25954,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPXORQMasked256", - argLen: 3, - commutative: true, - asm: x86.AVPXORQ, + name: "VPSRAQMasked256", + argLen: 3, + asm: x86.AVPSRAQ, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -25951,10 +25969,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMAXSQ256", - argLen: 2, - commutative: true, - asm: x86.AVPMAXSQ, + name: "VPSLLVQ256", + argLen: 2, + asm: x86.AVPSLLVQ, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -25966,14 +25983,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMINSQ256", - argLen: 2, - commutative: true, - asm: x86.AVPMINSQ, + name: "VPSHLDVQ256", + argLen: 3, + resultInArg0: true, + asm: x86.AVPSHLDVQ, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -25981,14 +25999,16 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMULLQ256", - argLen: 2, - commutative: true, - asm: x86.AVPMULLQ, + name: "VPSHLDVQMasked256", + argLen: 4, + resultInArg0: true, + asm: x86.AVPSHLDVQ, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -25996,12 +26016,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPOPCNTQ256", - argLen: 1, - asm: x86.AVPOPCNTQ, + name: "VPSLLVQMasked256", + argLen: 3, + asm: x86.AVPSLLVQ, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -26009,9 +26031,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPROLVQ256", + name: "VPSRLVQ256", argLen: 2, - asm: x86.AVPROLVQ, + asm: x86.AVPSRLVQ, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -26023,13 +26045,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPRORVQ256", - argLen: 2, - asm: x86.AVPRORVQ, + name: "VPSHRDVQ256", + argLen: 3, + resultInArg0: true, + asm: x86.AVPSHRDVQ, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -26037,13 +26061,31 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSLLQ256", - argLen: 2, - asm: x86.AVPSLLQ, + name: "VPSHRDVQMasked256", + argLen: 4, + resultInArg0: true, + asm: x86.AVPSHRDVQ, reg: regInfo{ inputs: []inputInfo{ + {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRLVQMasked256", + argLen: 3, + asm: x86.AVPSRLVQ, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -26051,9 +26093,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRLQ256", + name: "VPSRAVQ256", argLen: 2, - asm: x86.AVPSRLQ, + asm: x86.AVPSRAVQ, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -26065,13 +26107,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRAQ256", - argLen: 2, - asm: x86.AVPSRAQ, + name: "VPSRAVQMasked256", + argLen: 3, + asm: x86.AVPSRAVQ, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -26079,9 +26122,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSLLVQ256", + name: "VPSUBQ256", argLen: 2, - asm: x86.AVPSLLVQ, + asm: x86.AVPSUBQ, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -26093,15 +26136,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHLDVQ256", - argLen: 3, - resultInArg0: true, - asm: x86.AVPSHLDVQ, + name: "VPSUBQMasked256", + argLen: 3, + asm: x86.AVPSUBQ, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -26109,13 +26151,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRLVQ256", - argLen: 2, - asm: x86.AVPSRLVQ, + name: "VPXORQMasked256", + argLen: 3, + commutative: true, + asm: x86.AVPXORQ, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -26123,15 +26167,12 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHRDVQ256", - argLen: 3, - resultInArg0: true, - asm: x86.AVPSHRDVQ, + name: "VPABSQ512", + argLen: 1, + asm: x86.AVPABSQ, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -26139,13 +26180,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRAVQ256", + name: "VPABSQMasked512", argLen: 2, - asm: x86.AVPSRAVQ, + asm: x86.AVPABSQ, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -26153,9 +26194,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSUBQ256", - argLen: 2, - asm: x86.AVPSUBQ, + name: "VPADDQ512", + argLen: 2, + commutative: true, + asm: x86.AVPADDQ, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -26167,12 +26209,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPABSQ512", - argLen: 1, - asm: x86.AVPABSQ, + name: "VPADDQMasked512", + argLen: 3, + commutative: true, + asm: x86.AVPADDQ, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -26180,10 +26225,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPADDQ512", + name: "VPANDQ512", argLen: 2, commutative: true, - asm: x86.AVPADDQ, + asm: x86.AVPANDQ, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -26195,14 +26240,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPANDQ512", - argLen: 2, + name: "VPANDQMasked512", + argLen: 3, commutative: true, asm: x86.AVPANDQ, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -26224,13 +26270,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPABSQMasked512", - argLen: 2, - asm: x86.AVPABSQ, + name: "VPANDNQMasked512", + argLen: 3, + asm: x86.AVPANDNQ, reg: regInfo{ inputs: []inputInfo{ - {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -26238,15 +26285,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPADDQMasked512", - argLen: 3, + name: "VPMAXSQ512", + argLen: 2, commutative: true, - asm: x86.AVPADDQ, + asm: x86.AVPMAXSQ, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -26254,10 +26300,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPANDQMasked512", + name: "VPMAXSQMasked512", argLen: 3, commutative: true, - asm: x86.AVPANDQ, + asm: x86.AVPMAXSQ, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -26270,14 +26316,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPANDNQMasked512", - argLen: 3, - asm: x86.AVPANDNQ, + name: "VPMINSQ512", + argLen: 2, + commutative: true, + asm: x86.AVPMINSQ, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -26285,10 +26331,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMAXSQMasked512", + name: "VPMINSQMasked512", argLen: 3, commutative: true, - asm: x86.AVPMAXSQ, + asm: x86.AVPMINSQ, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -26301,15 +26347,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMINSQMasked512", - argLen: 3, + name: "VPMULDQ512", + argLen: 2, commutative: true, - asm: x86.AVPMINSQ, + asm: x86.AVPMULDQ, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -26333,15 +26378,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMULLQMasked512", - argLen: 3, + name: "VPMULLQ512", + argLen: 2, commutative: true, asm: x86.AVPMULLQ, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -26349,10 +26393,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPORQMasked512", + name: "VPMULLQMasked512", argLen: 3, commutative: true, - asm: x86.AVPORQ, + asm: x86.AVPMULLQ, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -26365,28 +26409,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPOPCNTQMasked512", - argLen: 2, - asm: x86.AVPOPCNTQ, + name: "VPORQ512", + argLen: 2, + commutative: true, + asm: x86.AVPORQ, reg: regInfo{ inputs: []inputInfo{ - {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, - { - name: "VPROLVQMasked512", - argLen: 3, - asm: x86.AVPROLVQ, - reg: regInfo{ - inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -26394,9 +26424,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPRORVQMasked512", - argLen: 3, - asm: x86.AVPRORVQ, + name: "VPORQMasked512", + argLen: 3, + commutative: true, + asm: x86.AVPORQ, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -26409,14 +26440,12 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSLLQMasked512", - argLen: 3, - asm: x86.AVPSLLQ, + name: "VPOPCNTQ512", + argLen: 1, + asm: x86.AVPOPCNTQ, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -26424,14 +26453,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRLQMasked512", - argLen: 3, - asm: x86.AVPSRLQ, + name: "VPOPCNTQMasked512", + argLen: 2, + asm: x86.AVPOPCNTQ, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -26439,14 +26467,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRAQMasked512", - argLen: 3, - asm: x86.AVPSRAQ, + name: "VPROLVQ512", + argLen: 2, + asm: x86.AVPROLVQ, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -26454,9 +26481,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSLLVQMasked512", + name: "VPROLVQMasked512", argLen: 3, - asm: x86.AVPSLLVQ, + asm: x86.AVPROLVQ, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -26469,16 +26496,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHLDVQMasked512", - argLen: 4, - resultInArg0: true, - asm: x86.AVPSHLDVQ, + name: "VPRORVQ512", + argLen: 2, + asm: x86.AVPRORVQ, reg: regInfo{ inputs: []inputInfo{ - {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -26486,9 +26510,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRLVQMasked512", + name: "VPRORVQMasked512", argLen: 3, - asm: x86.AVPSRLVQ, + asm: x86.AVPRORVQ, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -26501,16 +26525,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHRDVQMasked512", - argLen: 4, - resultInArg0: true, - asm: x86.AVPSHRDVQ, + name: "VPSLLQ512", + argLen: 2, + asm: x86.AVPSLLQ, reg: regInfo{ inputs: []inputInfo{ - {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -26518,9 +26539,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRAVQMasked512", + name: "VPSLLQMasked512", argLen: 3, - asm: x86.AVPSRAVQ, + asm: x86.AVPSLLQ, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -26533,14 +26554,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSUBQMasked512", - argLen: 3, - asm: x86.AVPSUBQ, + name: "VPSRLQ512", + argLen: 2, + asm: x86.AVPSRLQ, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -26548,10 +26568,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPXORQMasked512", - argLen: 3, - commutative: true, - asm: x86.AVPXORQ, + name: "VPSRLQMasked512", + argLen: 3, + asm: x86.AVPSRLQ, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -26564,10 +26583,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMAXSQ512", - argLen: 2, - commutative: true, - asm: x86.AVPMAXSQ, + name: "VPSRAQ512", + argLen: 2, + asm: x86.AVPSRAQ, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -26579,14 +26597,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMINSQ512", - argLen: 2, - commutative: true, - asm: x86.AVPMINSQ, + name: "VPSRAQMasked512", + argLen: 3, + asm: x86.AVPSRAQ, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -26594,10 +26612,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMULDQ512", - argLen: 2, - commutative: true, - asm: x86.AVPMULDQ, + name: "VPSLLVQ512", + argLen: 2, + asm: x86.AVPSLLVQ, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -26609,14 +26626,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMULLQ512", - argLen: 2, - commutative: true, - asm: x86.AVPMULLQ, + name: "VPSHLDVQ512", + argLen: 3, + resultInArg0: true, + asm: x86.AVPSHLDVQ, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -26624,14 +26642,16 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPORQ512", - argLen: 2, - commutative: true, - asm: x86.AVPORQ, + name: "VPSHLDVQMasked512", + argLen: 4, + resultInArg0: true, + asm: x86.AVPSHLDVQ, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -26639,12 +26659,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPOPCNTQ512", - argLen: 1, - asm: x86.AVPOPCNTQ, + name: "VPSLLVQMasked512", + argLen: 3, + asm: x86.AVPSLLVQ, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -26652,9 +26674,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPROLVQ512", + name: "VPSRLVQ512", argLen: 2, - asm: x86.AVPROLVQ, + asm: x86.AVPSRLVQ, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -26666,13 +26688,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPRORVQ512", - argLen: 2, - asm: x86.AVPRORVQ, + name: "VPSHRDVQ512", + argLen: 3, + resultInArg0: true, + asm: x86.AVPSHRDVQ, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -26680,13 +26704,16 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSLLQ512", - argLen: 2, - asm: x86.AVPSLLQ, + name: "VPSHRDVQMasked512", + argLen: 4, + resultInArg0: true, + asm: x86.AVPSHRDVQ, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {3, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -26694,13 +26721,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRLQ512", - argLen: 2, - asm: x86.AVPSRLQ, + name: "VPSRLVQMasked512", + argLen: 3, + asm: x86.AVPSRLVQ, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -26708,9 +26736,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRAQ512", + name: "VPSRAVQ512", argLen: 2, - asm: x86.AVPSRAQ, + asm: x86.AVPSRAVQ, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -26722,13 +26750,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSLLVQ512", - argLen: 2, - asm: x86.AVPSLLVQ, + name: "VPSRAVQMasked512", + argLen: 3, + asm: x86.AVPSRAVQ, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -26736,15 +26765,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHLDVQ512", - argLen: 3, - resultInArg0: true, - asm: x86.AVPSHLDVQ, + name: "VPSUBQ512", + argLen: 2, + asm: x86.AVPSUBQ, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -26752,13 +26779,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRLVQ512", - argLen: 2, - asm: x86.AVPSRLVQ, + name: "VPSUBQMasked512", + argLen: 3, + asm: x86.AVPSUBQ, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -26766,15 +26794,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHRDVQ512", - argLen: 3, - resultInArg0: true, - asm: x86.AVPSHRDVQ, + name: "VPXORQ512", + argLen: 2, + commutative: true, + asm: x86.AVPXORQ, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {2, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -26782,13 +26809,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRAVQ512", - argLen: 2, - asm: x86.AVPSRAVQ, + name: "VPXORQMasked512", + argLen: 3, + commutative: true, + asm: x86.AVPXORQ, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -26796,13 +26825,12 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSUBQ512", - argLen: 2, - asm: x86.AVPSUBQ, + name: "VPABSB128", + argLen: 1, + asm: x86.AVPABSB, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -26810,14 +26838,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPXORQ512", - argLen: 2, - commutative: true, - asm: x86.AVPXORQ, + name: "VPABSBMasked128", + argLen: 2, + asm: x86.AVPABSB, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -26825,12 +26852,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPABSB128", - argLen: 1, - asm: x86.AVPABSB, + name: "VPADDB128", + argLen: 2, + commutative: true, + asm: x86.AVPADDB, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -26838,14 +26867,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPADDB128", - argLen: 2, + name: "VPADDBMasked128", + argLen: 3, commutative: true, asm: x86.AVPADDB, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -26911,29 +26941,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPABSBMasked128", - argLen: 2, - asm: x86.AVPABSB, - reg: regInfo{ - inputs: []inputInfo{ - {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, - { - name: "VPADDBMasked128", - argLen: 3, + name: "VPMAXSB128", + argLen: 2, commutative: true, - asm: x86.AVPADDB, + asm: x86.AVPMAXSB, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -26957,29 +26972,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMINSBMasked128", - argLen: 3, + name: "VPMINSB128", + argLen: 2, commutative: true, asm: x86.AVPMINSB, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, - { - name: "VPOPCNTBMasked128", - argLen: 2, - asm: x86.AVPOPCNTB, - reg: regInfo{ - inputs: []inputInfo{ - {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -26987,10 +26987,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPADDSBMasked128", + name: "VPMINSBMasked128", argLen: 3, commutative: true, - asm: x86.AVPADDSB, + asm: x86.AVPMINSB, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -27003,14 +27003,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSUBSBMasked128", - argLen: 3, - asm: x86.AVPSUBSB, + name: "VPOR128", + argLen: 2, + commutative: true, + asm: x86.AVPOR, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -27018,14 +27018,12 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSUBBMasked128", - argLen: 3, - asm: x86.AVPSUBB, + name: "VPOPCNTB128", + argLen: 1, + asm: x86.AVPOPCNTB, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -27033,14 +27031,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMAXSB128", - argLen: 2, - commutative: true, - asm: x86.AVPMAXSB, + name: "VPOPCNTBMasked128", + argLen: 2, + asm: x86.AVPOPCNTB, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -27048,10 +27045,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMINSB128", + name: "VPADDSB128", argLen: 2, commutative: true, - asm: x86.AVPMINSB, + asm: x86.AVPADDSB, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -27063,14 +27060,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPOR128", - argLen: 2, + name: "VPADDSBMasked128", + argLen: 3, commutative: true, - asm: x86.AVPOR, + asm: x86.AVPADDSB, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -27078,12 +27076,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPOPCNTB128", - argLen: 1, - asm: x86.AVPOPCNTB, + name: "VPSUBSB128", + argLen: 2, + asm: x86.AVPSUBSB, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -27091,14 +27090,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPADDSB128", - argLen: 2, - commutative: true, - asm: x86.AVPADDSB, + name: "VPSUBSBMasked128", + argLen: 3, + asm: x86.AVPSUBSB, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -27106,9 +27105,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSUBSB128", + name: "VPSIGNB128", argLen: 2, - asm: x86.AVPSUBSB, + asm: x86.AVPSIGNB, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -27120,9 +27119,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSIGNB128", + name: "VPSUBB128", argLen: 2, - asm: x86.AVPSIGNB, + asm: x86.AVPSUBB, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -27134,13 +27133,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSUBB128", - argLen: 2, + name: "VPSUBBMasked128", + argLen: 3, asm: x86.AVPSUBB, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -27175,6 +27175,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPABSBMasked256", + argLen: 2, + asm: x86.AVPABSB, + reg: regInfo{ + inputs: []inputInfo{ + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPADDB256", argLen: 2, @@ -27190,6 +27204,22 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPADDBMasked256", + argLen: 3, + commutative: true, + asm: x86.AVPADDB, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPAND256", argLen: 2, @@ -27249,29 +27279,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPABSBMasked256", - argLen: 2, - asm: x86.AVPABSB, - reg: regInfo{ - inputs: []inputInfo{ - {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, - { - name: "VPADDBMasked256", - argLen: 3, + name: "VPMAXSB256", + argLen: 2, commutative: true, - asm: x86.AVPADDB, + asm: x86.AVPMAXSB, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -27295,29 +27310,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMINSBMasked256", - argLen: 3, + name: "VPMINSB256", + argLen: 2, commutative: true, asm: x86.AVPMINSB, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, - { - name: "VPOPCNTBMasked256", - argLen: 2, - asm: x86.AVPOPCNTB, - reg: regInfo{ - inputs: []inputInfo{ - {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -27325,10 +27325,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPADDSBMasked256", + name: "VPMINSBMasked256", argLen: 3, commutative: true, - asm: x86.AVPADDSB, + asm: x86.AVPMINSB, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -27341,14 +27341,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSUBSBMasked256", - argLen: 3, - asm: x86.AVPSUBSB, + name: "VPOR256", + argLen: 2, + commutative: true, + asm: x86.AVPOR, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -27356,14 +27356,12 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSUBBMasked256", - argLen: 3, - asm: x86.AVPSUBB, + name: "VPOPCNTB256", + argLen: 1, + asm: x86.AVPOPCNTB, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -27371,14 +27369,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMAXSB256", - argLen: 2, - commutative: true, - asm: x86.AVPMAXSB, + name: "VPOPCNTBMasked256", + argLen: 2, + asm: x86.AVPOPCNTB, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -27386,10 +27383,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMINSB256", + name: "VPADDSB256", argLen: 2, commutative: true, - asm: x86.AVPMINSB, + asm: x86.AVPADDSB, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -27401,14 +27398,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPOR256", - argLen: 2, + name: "VPADDSBMasked256", + argLen: 3, commutative: true, - asm: x86.AVPOR, + asm: x86.AVPADDSB, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -27416,12 +27414,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPOPCNTB256", - argLen: 1, - asm: x86.AVPOPCNTB, + name: "VPSUBSB256", + argLen: 2, + asm: x86.AVPSUBSB, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -27429,14 +27428,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPADDSB256", - argLen: 2, - commutative: true, - asm: x86.AVPADDSB, + name: "VPSUBSBMasked256", + argLen: 3, + asm: x86.AVPSUBSB, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -27444,9 +27443,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSUBSB256", + name: "VPSIGNB256", argLen: 2, - asm: x86.AVPSUBSB, + asm: x86.AVPSIGNB, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -27458,9 +27457,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSIGNB256", + name: "VPSUBB256", argLen: 2, - asm: x86.AVPSIGNB, + asm: x86.AVPSUBB, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -27472,13 +27471,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSUBB256", - argLen: 2, + name: "VPSUBBMasked256", + argLen: 3, asm: x86.AVPSUBB, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -27514,14 +27514,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPADDB512", - argLen: 2, - commutative: true, - asm: x86.AVPADDB, + name: "VPABSBMasked512", + argLen: 2, + asm: x86.AVPABSB, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -27529,13 +27528,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPABSBMasked512", - argLen: 2, - asm: x86.AVPABSB, + name: "VPADDB512", + argLen: 2, + commutative: true, + asm: x86.AVPADDB, reg: regInfo{ inputs: []inputInfo{ - {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -27559,15 +27559,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMAXSBMasked512", - argLen: 3, + name: "VPMAXSB512", + argLen: 2, commutative: true, asm: x86.AVPMAXSB, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -27575,10 +27574,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMINSBMasked512", + name: "VPMAXSBMasked512", argLen: 3, commutative: true, - asm: x86.AVPMINSB, + asm: x86.AVPMAXSB, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -27591,13 +27590,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPOPCNTBMasked512", - argLen: 2, - asm: x86.AVPOPCNTB, + name: "VPMINSB512", + argLen: 2, + commutative: true, + asm: x86.AVPMINSB, reg: regInfo{ inputs: []inputInfo{ - {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -27605,10 +27605,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPADDSBMasked512", + name: "VPMINSBMasked512", argLen: 3, commutative: true, - asm: x86.AVPADDSB, + asm: x86.AVPMINSB, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -27621,14 +27621,12 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSUBSBMasked512", - argLen: 3, - asm: x86.AVPSUBSB, + name: "VPOPCNTB512", + argLen: 1, + asm: x86.AVPOPCNTB, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -27636,14 +27634,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSUBBMasked512", - argLen: 3, - asm: x86.AVPSUBB, + name: "VPOPCNTBMasked512", + argLen: 2, + asm: x86.AVPOPCNTB, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -27651,10 +27648,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMAXSB512", + name: "VPADDSB512", argLen: 2, commutative: true, - asm: x86.AVPMAXSB, + asm: x86.AVPADDSB, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -27666,14 +27663,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMINSB512", - argLen: 2, + name: "VPADDSBMasked512", + argLen: 3, commutative: true, - asm: x86.AVPMINSB, + asm: x86.AVPADDSB, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -27681,12 +27679,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPOPCNTB512", - argLen: 1, - asm: x86.AVPOPCNTB, + name: "VPSUBSB512", + argLen: 2, + asm: x86.AVPSUBSB, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -27694,14 +27693,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPADDSB512", - argLen: 2, - commutative: true, - asm: x86.AVPADDSB, + name: "VPSUBSBMasked512", + argLen: 3, + asm: x86.AVPSUBSB, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -27709,9 +27708,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSUBSB512", + name: "VPSUBB512", argLen: 2, - asm: x86.AVPSUBSB, + asm: x86.AVPSUBB, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -27723,13 +27722,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSUBB512", - argLen: 2, + name: "VPSUBBMasked512", + argLen: 3, asm: x86.AVPSUBB, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -27768,15 +27768,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMAXUWMasked256", - argLen: 3, + name: "VPMAXUW256", + argLen: 2, commutative: true, asm: x86.AVPMAXUW, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -27784,10 +27783,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMINUWMasked256", + name: "VPMAXUWMasked256", argLen: 3, commutative: true, - asm: x86.AVPMINUW, + asm: x86.AVPMAXUW, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -27800,15 +27799,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMULHUWMasked256", - argLen: 3, + name: "VPMINUW256", + argLen: 2, commutative: true, - asm: x86.AVPMULHUW, + asm: x86.AVPMINUW, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -27816,14 +27814,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMAXUW256", - argLen: 2, + name: "VPMINUWMasked256", + argLen: 3, commutative: true, - asm: x86.AVPMAXUW, + asm: x86.AVPMINUW, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -27831,10 +27830,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMINUW256", + name: "VPMULHUW256", argLen: 2, commutative: true, - asm: x86.AVPMINUW, + asm: x86.AVPMULHUW, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -27846,14 +27845,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMULHUW256", - argLen: 2, + name: "VPMULHUWMasked256", + argLen: 3, commutative: true, asm: x86.AVPMULHUW, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -27892,15 +27892,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMAXUWMasked512", - argLen: 3, + name: "VPMAXUW512", + argLen: 2, commutative: true, asm: x86.AVPMAXUW, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -27908,10 +27907,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMINUWMasked512", + name: "VPMAXUWMasked512", argLen: 3, commutative: true, - asm: x86.AVPMINUW, + asm: x86.AVPMAXUW, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -27924,15 +27923,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMULHUWMasked512", - argLen: 3, + name: "VPMINUW512", + argLen: 2, commutative: true, - asm: x86.AVPMULHUW, + asm: x86.AVPMINUW, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -27940,14 +27938,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMAXUW512", - argLen: 2, + name: "VPMINUWMasked512", + argLen: 3, commutative: true, - asm: x86.AVPMAXUW, + asm: x86.AVPMINUW, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -27955,10 +27954,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMINUW512", + name: "VPMULHUW512", argLen: 2, commutative: true, - asm: x86.AVPMINUW, + asm: x86.AVPMULHUW, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -27970,14 +27969,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMULHUW512", - argLen: 2, + name: "VPMULHUWMasked512", + argLen: 3, commutative: true, asm: x86.AVPMULHUW, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -28015,6 +28015,21 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPMAXUW128", + argLen: 2, + commutative: true, + asm: x86.AVPMAXUW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPMAXUWMasked128", argLen: 3, @@ -28032,15 +28047,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMINUWMasked128", - argLen: 3, + name: "VPMINUW128", + argLen: 2, commutative: true, asm: x86.AVPMINUW, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -28048,10 +28062,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMULHUWMasked128", + name: "VPMINUWMasked128", argLen: 3, commutative: true, - asm: x86.AVPMULHUW, + asm: x86.AVPMINUW, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -28064,10 +28078,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMAXUW128", + name: "VPMULHUW128", argLen: 2, commutative: true, - asm: x86.AVPMAXUW, + asm: x86.AVPMULHUW, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -28079,14 +28093,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMINUW128", - argLen: 2, + name: "VPMULHUWMasked128", + argLen: 3, commutative: true, - asm: x86.AVPMINUW, + asm: x86.AVPMULHUW, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -28094,10 +28109,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMULHUW128", + name: "VPMAXUD512", argLen: 2, commutative: true, - asm: x86.AVPMULHUW, + asm: x86.AVPMAXUD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -28125,15 +28140,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMINUDMasked512", - argLen: 3, + name: "VPMINUD512", + argLen: 2, commutative: true, asm: x86.AVPMINUD, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -28141,14 +28155,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMAXUD512", - argLen: 2, + name: "VPMINUDMasked512", + argLen: 3, commutative: true, - asm: x86.AVPMAXUD, + asm: x86.AVPMINUD, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -28156,10 +28171,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMINUD512", + name: "VPMAXUD128", argLen: 2, commutative: true, - asm: x86.AVPMINUD, + asm: x86.AVPMAXUD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -28187,15 +28202,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMINUDMasked128", - argLen: 3, + name: "VPMINUD128", + argLen: 2, commutative: true, asm: x86.AVPMINUD, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -28203,14 +28217,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMAXUD128", - argLen: 2, + name: "VPMINUDMasked128", + argLen: 3, commutative: true, - asm: x86.AVPMAXUD, + asm: x86.AVPMINUD, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -28218,10 +28233,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMINUD128", + name: "VPMULUDQ128", argLen: 2, commutative: true, - asm: x86.AVPMINUD, + asm: x86.AVPMULUDQ, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -28233,10 +28248,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMULUDQ128", + name: "VPMAXUD256", argLen: 2, commutative: true, - asm: x86.AVPMULUDQ, + asm: x86.AVPMAXUD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -28264,15 +28279,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMINUDMasked256", - argLen: 3, + name: "VPMINUD256", + argLen: 2, commutative: true, asm: x86.AVPMINUD, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -28280,14 +28294,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMAXUD256", - argLen: 2, + name: "VPMINUDMasked256", + argLen: 3, commutative: true, - asm: x86.AVPMAXUD, + asm: x86.AVPMINUD, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -28295,10 +28310,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMINUD256", + name: "VPMULUDQ256", argLen: 2, commutative: true, - asm: x86.AVPMINUD, + asm: x86.AVPMULUDQ, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -28310,10 +28325,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMULUDQ256", + name: "VPMAXUQ128", argLen: 2, commutative: true, - asm: x86.AVPMULUDQ, + asm: x86.AVPMAXUQ, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -28340,6 +28355,21 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPMINUQ128", + argLen: 2, + commutative: true, + asm: x86.AVPMINUQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPMINUQMasked128", argLen: 3, @@ -28373,7 +28403,7 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMAXUQ128", + name: "VPMAXUQ256", argLen: 2, commutative: true, asm: x86.AVPMAXUQ, @@ -28388,14 +28418,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMINUQ128", - argLen: 2, + name: "VPMAXUQMasked256", + argLen: 3, commutative: true, - asm: x86.AVPMINUQ, + asm: x86.AVPMAXUQ, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -28403,15 +28434,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMAXUQMasked256", - argLen: 3, + name: "VPMINUQ256", + argLen: 2, commutative: true, - asm: x86.AVPMAXUQ, + asm: x86.AVPMINUQ, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -28451,7 +28481,7 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMAXUQ256", + name: "VPMAXUQ512", argLen: 2, commutative: true, asm: x86.AVPMAXUQ, @@ -28465,21 +28495,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "VPMINUQ256", - argLen: 2, - commutative: true, - asm: x86.AVPMINUQ, - reg: regInfo{ - inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, { name: "VPMAXUQMasked512", argLen: 3, @@ -28497,15 +28512,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMINUQMasked512", - argLen: 3, + name: "VPMINUQ512", + argLen: 2, commutative: true, asm: x86.AVPMINUQ, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -28513,10 +28527,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMULUDQMasked512", + name: "VPMINUQMasked512", argLen: 3, commutative: true, - asm: x86.AVPMULUDQ, + asm: x86.AVPMINUQ, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -28529,10 +28543,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMAXUQ512", + name: "VPMULUDQ512", argLen: 2, commutative: true, - asm: x86.AVPMAXUQ, + asm: x86.AVPMULUDQ, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -28544,14 +28558,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMINUQ512", - argLen: 2, + name: "VPMULUDQMasked512", + argLen: 3, commutative: true, - asm: x86.AVPMINUQ, + asm: x86.AVPMULUDQ, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -28559,10 +28574,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMULUDQ512", + name: "VPAVGB128", argLen: 2, commutative: true, - asm: x86.AVPMULUDQ, + asm: x86.AVPAVGB, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -28574,14 +28589,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPAVGB128", - argLen: 2, + name: "VPAVGBMasked128", + argLen: 3, commutative: true, asm: x86.AVPAVGB, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -28602,22 +28618,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "VPAVGBMasked128", - argLen: 3, - commutative: true, - asm: x86.AVPAVGB, - reg: regInfo{ - inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, { name: "VGF2P8MULBMasked128", argLen: 3, @@ -28634,15 +28634,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMAXUBMasked128", - argLen: 3, + name: "VPMAXUB128", + argLen: 2, commutative: true, asm: x86.AVPMAXUB, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -28650,25 +28649,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMINUBMasked128", + name: "VPMAXUBMasked128", argLen: 3, commutative: true, - asm: x86.AVPMINUB, - reg: regInfo{ - inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, - { - name: "VPMADDUBSWMasked128", - argLen: 3, - asm: x86.AVPMADDUBSW, + asm: x86.AVPMAXUB, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -28681,10 +28665,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMAXUB128", + name: "VPMINUB128", argLen: 2, commutative: true, - asm: x86.AVPMAXUB, + asm: x86.AVPMINUB, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -28696,14 +28680,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMINUB128", - argLen: 2, + name: "VPMINUBMasked128", + argLen: 3, commutative: true, asm: x86.AVPMINUB, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -28725,14 +28710,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPAVGB256", - argLen: 2, - commutative: true, - asm: x86.AVPAVGB, + name: "VPMADDUBSWMasked128", + argLen: 3, + asm: x86.AVPMADDUBSW, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -28740,9 +28725,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VGF2P8MULB256", - argLen: 2, - asm: x86.AVGF2P8MULB, + name: "VPAVGB256", + argLen: 2, + commutative: true, + asm: x86.AVPAVGB, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -28770,14 +28756,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VGF2P8MULBMasked256", - argLen: 3, + name: "VGF2P8MULB256", + argLen: 2, asm: x86.AVGF2P8MULB, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -28785,10 +28770,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMAXUBMasked256", - argLen: 3, - commutative: true, - asm: x86.AVPMAXUB, + name: "VGF2P8MULBMasked256", + argLen: 3, + asm: x86.AVGF2P8MULB, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -28801,15 +28785,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMINUBMasked256", - argLen: 3, + name: "VPMAXUB256", + argLen: 2, commutative: true, - asm: x86.AVPMINUB, + asm: x86.AVPMAXUB, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -28817,9 +28800,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMADDUBSWMasked256", - argLen: 3, - asm: x86.AVPMADDUBSW, + name: "VPMAXUBMasked256", + argLen: 3, + commutative: true, + asm: x86.AVPMAXUB, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -28832,10 +28816,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMAXUB256", + name: "VPMINUB256", argLen: 2, commutative: true, - asm: x86.AVPMAXUB, + asm: x86.AVPMINUB, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -28847,14 +28831,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMINUB256", - argLen: 2, + name: "VPMINUBMasked256", + argLen: 3, commutative: true, asm: x86.AVPMINUB, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -28876,14 +28861,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPAVGB512", - argLen: 2, - commutative: true, - asm: x86.AVPAVGB, + name: "VPMADDUBSWMasked256", + argLen: 3, + asm: x86.AVPMADDUBSW, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -28891,9 +28876,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VGF2P8MULB512", - argLen: 2, - asm: x86.AVGF2P8MULB, + name: "VPAVGB512", + argLen: 2, + commutative: true, + asm: x86.AVPAVGB, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -28921,14 +28907,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VGF2P8MULBMasked512", - argLen: 3, + name: "VGF2P8MULB512", + argLen: 2, asm: x86.AVGF2P8MULB, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -28936,10 +28921,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMAXUBMasked512", - argLen: 3, - commutative: true, - asm: x86.AVPMAXUB, + name: "VGF2P8MULBMasked512", + argLen: 3, + asm: x86.AVGF2P8MULB, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -28952,15 +28936,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMINUBMasked512", - argLen: 3, + name: "VPMAXUB512", + argLen: 2, commutative: true, - asm: x86.AVPMINUB, + asm: x86.AVPMAXUB, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -28968,9 +28951,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMADDUBSWMasked512", - argLen: 3, - asm: x86.AVPMADDUBSW, + name: "VPMAXUBMasked512", + argLen: 3, + commutative: true, + asm: x86.AVPMAXUB, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -28983,10 +28967,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMAXUB512", + name: "VPMINUB512", argLen: 2, commutative: true, - asm: x86.AVPMAXUB, + asm: x86.AVPMINUB, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -28998,14 +28982,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMINUB512", - argLen: 2, + name: "VPMINUBMasked512", + argLen: 3, commutative: true, asm: x86.AVPMINUB, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -29026,6 +29011,21 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPMADDUBSWMasked512", + argLen: 3, + asm: x86.AVPMADDUBSW, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VRNDSCALEPS512", auxType: auxInt8, @@ -29041,13 +29041,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VREDUCEPS512", + name: "VRNDSCALEPSMasked512", auxType: auxInt8, - argLen: 1, - asm: x86.AVREDUCEPS, + argLen: 2, + asm: x86.AVRNDSCALEPS, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -29055,26 +29056,24 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VCMPPS512", - auxType: auxInt8, - argLen: 2, - commutative: true, - asm: x86.AVCMPPS, + name: "VREDUCEPS512", + auxType: auxInt8, + argLen: 1, + asm: x86.AVREDUCEPS, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ - {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, }, }, { - name: "VRNDSCALEPSMasked512", + name: "VREDUCEPSMasked512", auxType: auxInt8, argLen: 2, - asm: x86.AVRNDSCALEPS, + asm: x86.AVREDUCEPS, reg: regInfo{ inputs: []inputInfo{ {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -29086,17 +29085,18 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VREDUCEPSMasked512", - auxType: auxInt8, - argLen: 2, - asm: x86.AVREDUCEPS, + name: "VCMPPS512", + auxType: auxInt8, + argLen: 2, + commutative: true, + asm: x86.AVCMPPS, reg: regInfo{ inputs: []inputInfo{ - {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 }, }, }, @@ -29146,13 +29146,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VREDUCEPS128", + name: "VRNDSCALEPSMasked128", auxType: auxInt8, - argLen: 1, - asm: x86.AVREDUCEPS, + argLen: 2, + asm: x86.AVRNDSCALEPS, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -29160,15 +29161,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VCMPPS128", - auxType: auxInt8, - argLen: 2, - commutative: true, - asm: x86.AVCMPPS, + name: "VREDUCEPS128", + auxType: auxInt8, + argLen: 1, + asm: x86.AVREDUCEPS, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -29176,10 +29175,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VRNDSCALEPSMasked128", + name: "VREDUCEPSMasked128", auxType: auxInt8, argLen: 2, - asm: x86.AVRNDSCALEPS, + asm: x86.AVREDUCEPS, reg: regInfo{ inputs: []inputInfo{ {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -29191,14 +29190,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VREDUCEPSMasked128", - auxType: auxInt8, - argLen: 2, - asm: x86.AVREDUCEPS, + name: "VCMPPS128", + auxType: auxInt8, + argLen: 2, + commutative: true, + asm: x86.AVCMPPS, reg: regInfo{ inputs: []inputInfo{ - {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -29251,29 +29251,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VREDUCEPS256", + name: "VRNDSCALEPSMasked256", auxType: auxInt8, - argLen: 1, - asm: x86.AVREDUCEPS, - reg: regInfo{ - inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, - { - name: "VCMPPS256", - auxType: auxInt8, - argLen: 2, - commutative: true, - asm: x86.AVCMPPS, + argLen: 2, + asm: x86.AVRNDSCALEPS, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -29281,10 +29266,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VEXTRACTF128128", + name: "VREDUCEPS256", auxType: auxInt8, argLen: 1, - asm: x86.AVEXTRACTF128, + asm: x86.AVREDUCEPS, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -29295,10 +29280,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VRNDSCALEPSMasked256", + name: "VREDUCEPSMasked256", auxType: auxInt8, argLen: 2, - asm: x86.AVRNDSCALEPS, + asm: x86.AVREDUCEPS, reg: regInfo{ inputs: []inputInfo{ {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -29310,14 +29295,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VREDUCEPSMasked256", - auxType: auxInt8, - argLen: 2, - asm: x86.AVREDUCEPS, + name: "VCMPPS256", + auxType: auxInt8, + argLen: 2, + commutative: true, + asm: x86.AVCMPPS, reg: regInfo{ inputs: []inputInfo{ - {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -29341,6 +29327,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VEXTRACTF128128", + auxType: auxInt8, + argLen: 1, + asm: x86.AVEXTRACTF128, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VINSERTF128256", auxType: auxInt8, @@ -29385,13 +29385,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VREDUCEPD128", + name: "VRNDSCALEPDMasked128", auxType: auxInt8, - argLen: 1, - asm: x86.AVREDUCEPD, + argLen: 2, + asm: x86.AVRNDSCALEPD, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -29399,15 +29400,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VDPPD128", - auxType: auxInt8, - argLen: 2, - commutative: true, - asm: x86.AVDPPD, + name: "VREDUCEPD128", + auxType: auxInt8, + argLen: 1, + asm: x86.AVREDUCEPD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -29415,15 +29414,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VCMPPD128", - auxType: auxInt8, - argLen: 2, - commutative: true, - asm: x86.AVCMPPD, + name: "VREDUCEPDMasked128", + auxType: auxInt8, + argLen: 2, + asm: x86.AVREDUCEPD, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -29431,14 +29429,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VRNDSCALEPDMasked128", - auxType: auxInt8, - argLen: 2, - asm: x86.AVRNDSCALEPD, + name: "VDPPD128", + auxType: auxInt8, + argLen: 2, + commutative: true, + asm: x86.AVDPPD, reg: regInfo{ inputs: []inputInfo{ - {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -29446,14 +29445,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VREDUCEPDMasked128", - auxType: auxInt8, - argLen: 2, - asm: x86.AVREDUCEPD, + name: "VCMPPD128", + auxType: auxInt8, + argLen: 2, + commutative: true, + asm: x86.AVCMPPD, reg: regInfo{ inputs: []inputInfo{ - {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -29506,13 +29506,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VREDUCEPD256", + name: "VRNDSCALEPDMasked256", auxType: auxInt8, - argLen: 1, - asm: x86.AVREDUCEPD, + argLen: 2, + asm: x86.AVRNDSCALEPD, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -29520,15 +29521,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VCMPPD256", - auxType: auxInt8, - argLen: 2, - commutative: true, - asm: x86.AVCMPPD, + name: "VREDUCEPD256", + auxType: auxInt8, + argLen: 1, + asm: x86.AVREDUCEPD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -29536,10 +29535,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VRNDSCALEPDMasked256", + name: "VREDUCEPDMasked256", auxType: auxInt8, argLen: 2, - asm: x86.AVRNDSCALEPD, + asm: x86.AVREDUCEPD, reg: regInfo{ inputs: []inputInfo{ {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -29551,14 +29550,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VREDUCEPDMasked256", - auxType: auxInt8, - argLen: 2, - asm: x86.AVREDUCEPD, + name: "VCMPPD256", + auxType: auxInt8, + argLen: 2, + commutative: true, + asm: x86.AVCMPPD, reg: regInfo{ inputs: []inputInfo{ - {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -29597,13 +29597,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VREDUCEPD512", + name: "VRNDSCALEPDMasked512", auxType: auxInt8, - argLen: 1, - asm: x86.AVREDUCEPD, + argLen: 2, + asm: x86.AVRNDSCALEPD, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -29611,26 +29612,24 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VCMPPD512", - auxType: auxInt8, - argLen: 2, - commutative: true, - asm: x86.AVCMPPD, + name: "VREDUCEPD512", + auxType: auxInt8, + argLen: 1, + asm: x86.AVREDUCEPD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ - {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, }, }, { - name: "VRNDSCALEPDMasked512", + name: "VREDUCEPDMasked512", auxType: auxInt8, argLen: 2, - asm: x86.AVRNDSCALEPD, + asm: x86.AVREDUCEPD, reg: regInfo{ inputs: []inputInfo{ {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -29642,17 +29641,18 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VREDUCEPDMasked512", - auxType: auxInt8, - argLen: 2, - asm: x86.AVREDUCEPD, + name: "VCMPPD512", + auxType: auxInt8, + argLen: 2, + commutative: true, + asm: x86.AVCMPPD, reg: regInfo{ inputs: []inputInfo{ - {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 }, }, }, @@ -29674,31 +29674,31 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPCMPW256", - auxType: auxInt8, - argLen: 2, - asm: x86.AVPCMPW, + name: "VPCMPWMasked256", + auxType: auxInt8, + argLen: 3, + commutative: true, + asm: x86.AVPCMPW, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 }, - }, - }, - { - name: "VPCMPWMasked256", - auxType: auxInt8, - argLen: 3, - commutative: true, - asm: x86.AVPCMPW, + }, + }, + { + name: "VPCMPW256", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPCMPW, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -29706,15 +29706,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHLDWMasked256", + name: "VPSHLDW256", auxType: auxInt8, - argLen: 3, + argLen: 2, asm: x86.AVPSHLDW, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -29722,10 +29721,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHRDWMasked256", + name: "VPSHLDWMasked256", auxType: auxInt8, argLen: 3, - asm: x86.AVPSHRDW, + asm: x86.AVPSHLDW, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -29738,10 +29737,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHLDW256", + name: "VPSHRDW256", auxType: auxInt8, argLen: 2, - asm: x86.AVPSHLDW, + asm: x86.AVPSHRDW, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -29753,14 +29752,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHRDW256", + name: "VPSHRDWMasked256", auxType: auxInt8, - argLen: 2, + argLen: 3, asm: x86.AVPSHRDW, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -29801,15 +29801,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHLDWMasked512", + name: "VPSHLDW512", auxType: auxInt8, - argLen: 3, + argLen: 2, asm: x86.AVPSHLDW, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -29817,10 +29816,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHRDWMasked512", + name: "VPSHLDWMasked512", auxType: auxInt8, argLen: 3, - asm: x86.AVPSHRDW, + asm: x86.AVPSHLDW, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -29833,10 +29832,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHLDW512", + name: "VPSHRDW512", auxType: auxInt8, argLen: 2, - asm: x86.AVPSHLDW, + asm: x86.AVPSHRDW, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -29848,20 +29847,38 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHRDW512", + name: "VPSHRDWMasked512", auxType: auxInt8, - argLen: 2, + argLen: 3, asm: x86.AVPSHRDW, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, }, }, + { + name: "VPCMPWMasked128", + auxType: auxInt8, + argLen: 3, + commutative: true, + asm: x86.AVPCMPW, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + }, + }, + }, { name: "VPEXTRW128", auxType: auxInt8, @@ -29892,32 +29909,29 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPCMPWMasked128", - auxType: auxInt8, - argLen: 3, - commutative: true, - asm: x86.AVPCMPW, + name: "VPINSRW128", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPINSRW, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ - {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, }, }, { - name: "VPSHLDWMasked128", + name: "VPSHLDW128", auxType: auxInt8, - argLen: 3, + argLen: 2, asm: x86.AVPSHLDW, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -29925,10 +29939,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHRDWMasked128", + name: "VPSHLDWMasked128", auxType: auxInt8, argLen: 3, - asm: x86.AVPSHRDW, + asm: x86.AVPSHLDW, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -29941,25 +29955,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPINSRW128", - auxType: auxInt8, - argLen: 2, - asm: x86.AVPINSRW, - reg: regInfo{ - inputs: []inputInfo{ - {1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, - { - name: "VPSHLDW128", + name: "VPSHRDW128", auxType: auxInt8, argLen: 2, - asm: x86.AVPSHLDW, + asm: x86.AVPSHRDW, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -29971,14 +29970,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHRDW128", + name: "VPSHRDWMasked128", auxType: auxInt8, - argLen: 2, + argLen: 3, asm: x86.AVPSHRDW, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -30019,14 +30019,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPROLDMasked512", + name: "VPROLD512", auxType: auxInt8, - argLen: 2, + argLen: 1, asm: x86.AVPROLD, reg: regInfo{ inputs: []inputInfo{ - {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -30034,10 +30033,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPRORDMasked512", + name: "VPROLDMasked512", auxType: auxInt8, argLen: 2, - asm: x86.AVPRORD, + asm: x86.AVPROLD, reg: regInfo{ inputs: []inputInfo{ {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -30049,45 +30048,28 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHLDDMasked512", + name: "VPRORD512", auxType: auxInt8, - argLen: 3, - asm: x86.AVPSHLDD, + argLen: 1, + asm: x86.AVPRORD, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, - }, - }, - { - name: "VPSHRDDMasked512", - auxType: auxInt8, - argLen: 3, - asm: x86.AVPSHRDD, - reg: regInfo{ - inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, }, }, { - name: "VPROLD512", + name: "VPRORDMasked512", auxType: auxInt8, - argLen: 1, - asm: x86.AVPROLD, + argLen: 2, + asm: x86.AVPRORD, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -30095,13 +30077,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPRORD512", + name: "VPSHLDD512", auxType: auxInt8, - argLen: 1, - asm: x86.AVPRORD, + argLen: 2, + asm: x86.AVPSHLDD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -30109,14 +30092,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHLDD512", + name: "VPSHLDDMasked512", auxType: auxInt8, - argLen: 2, + argLen: 3, asm: x86.AVPSHLDD, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -30139,31 +30123,18 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPEXTRD128", + name: "VPSHRDDMasked512", auxType: auxInt8, - argLen: 1, - asm: x86.AVPEXTRD, + argLen: 3, + asm: x86.AVPSHRDD, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ - {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 - }, - }, - }, - { - name: "VPCMPD128", - auxType: auxInt8, - argLen: 2, - asm: x86.AVPCMPD, - reg: regInfo{ - inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 }, }, }, @@ -30185,45 +30156,42 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPROLDMasked128", + name: "VPEXTRD128", auxType: auxInt8, - argLen: 2, - asm: x86.AVPROLD, + argLen: 1, + asm: x86.AVPEXTRD, reg: regInfo{ inputs: []inputInfo{ - {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 }, }, }, { - name: "VPRORDMasked128", + name: "VPCMPD128", auxType: auxInt8, argLen: 2, - asm: x86.AVPRORD, + asm: x86.AVPCMPD, reg: regInfo{ inputs: []inputInfo{ - {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 }, }, }, { - name: "VPSHLDDMasked128", + name: "VPROLD128", auxType: auxInt8, - argLen: 3, - asm: x86.AVPSHLDD, + argLen: 1, + asm: x86.AVPROLD, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -30231,15 +30199,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHRDDMasked128", + name: "VPROLDMasked128", auxType: auxInt8, - argLen: 3, - asm: x86.AVPSHRDD, + argLen: 2, + asm: x86.AVPROLD, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -30247,10 +30214,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPROLD128", + name: "VPRORD128", auxType: auxInt8, argLen: 1, - asm: x86.AVPROLD, + asm: x86.AVPRORD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -30261,13 +30228,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPRORD128", + name: "VPRORDMasked128", auxType: auxInt8, - argLen: 1, + argLen: 2, asm: x86.AVPRORD, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -30305,14 +30273,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHRDD128", + name: "VPSHLDDMasked128", auxType: auxInt8, - argLen: 2, - asm: x86.AVPSHRDD, + argLen: 3, + asm: x86.AVPSHLDD, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -30320,26 +30289,25 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPCMPD256", + name: "VPSHRDD128", auxType: auxInt8, argLen: 2, - asm: x86.AVPCMPD, + asm: x86.AVPSHRDD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ - {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, }, }, { - name: "VPCMPDMasked256", - auxType: auxInt8, - argLen: 3, - commutative: true, - asm: x86.AVPCMPD, + name: "VPSHRDDMasked128", + auxType: auxInt8, + argLen: 3, + asm: x86.AVPSHRDD, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -30347,50 +30315,50 @@ var opcodeTable = [...]opInfo{ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ - {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, }, }, { - name: "VPROLDMasked256", - auxType: auxInt8, - argLen: 2, - asm: x86.AVPROLD, + name: "VPCMPDMasked256", + auxType: auxInt8, + argLen: 3, + commutative: true, + asm: x86.AVPCMPD, reg: regInfo{ inputs: []inputInfo{ - {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 }, }, }, { - name: "VPRORDMasked256", + name: "VPCMPD256", auxType: auxInt8, argLen: 2, - asm: x86.AVPRORD, + asm: x86.AVPCMPD, reg: regInfo{ inputs: []inputInfo{ - {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 }, }, }, { - name: "VPSHLDDMasked256", + name: "VPROLD256", auxType: auxInt8, - argLen: 3, - asm: x86.AVPSHLDD, + argLen: 1, + asm: x86.AVPROLD, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -30398,15 +30366,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHRDDMasked256", + name: "VPROLDMasked256", auxType: auxInt8, - argLen: 3, - asm: x86.AVPSHRDD, + argLen: 2, + asm: x86.AVPROLD, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -30414,10 +30381,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPROLD256", + name: "VPRORD256", auxType: auxInt8, argLen: 1, - asm: x86.AVPROLD, + asm: x86.AVPRORD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -30428,13 +30395,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPRORD256", + name: "VPRORDMasked256", auxType: auxInt8, - argLen: 1, + argLen: 2, asm: x86.AVPRORD, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -30457,14 +30425,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHRDD256", + name: "VPSHLDDMasked256", auxType: auxInt8, - argLen: 2, - asm: x86.AVPSHRDD, + argLen: 3, + asm: x86.AVPSHLDD, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -30472,31 +30441,33 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPEXTRQ128", + name: "VPSHRDD256", auxType: auxInt8, - argLen: 1, - asm: x86.AVPEXTRQ, + argLen: 2, + asm: x86.AVPSHRDD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ - {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, }, }, { - name: "VPCMPQ128", + name: "VPSHRDDMasked256", auxType: auxInt8, - argLen: 2, - asm: x86.AVPCMPQ, + argLen: 3, + asm: x86.AVPSHRDD, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ - {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, }, }, @@ -30518,45 +30489,42 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPROLQMasked128", + name: "VPEXTRQ128", auxType: auxInt8, - argLen: 2, - asm: x86.AVPROLQ, + argLen: 1, + asm: x86.AVPEXTRQ, reg: regInfo{ inputs: []inputInfo{ - {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 }, }, }, { - name: "VPRORQMasked128", + name: "VPCMPQ128", auxType: auxInt8, argLen: 2, - asm: x86.AVPRORQ, + asm: x86.AVPCMPQ, reg: regInfo{ inputs: []inputInfo{ - {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 }, }, }, { - name: "VPSHLDQMasked128", + name: "VPROLQ128", auxType: auxInt8, - argLen: 3, - asm: x86.AVPSHLDQ, + argLen: 1, + asm: x86.AVPROLQ, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -30564,15 +30532,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHRDQMasked128", + name: "VPROLQMasked128", auxType: auxInt8, - argLen: 3, - asm: x86.AVPSHRDQ, + argLen: 2, + asm: x86.AVPROLQ, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -30580,10 +30547,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPROLQ128", + name: "VPRORQ128", auxType: auxInt8, argLen: 1, - asm: x86.AVPROLQ, + asm: x86.AVPRORQ, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -30594,13 +30561,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPRORQ128", + name: "VPRORQMasked128", auxType: auxInt8, - argLen: 1, + argLen: 2, asm: x86.AVPRORQ, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -30637,6 +30605,22 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPSHLDQMasked128", + auxType: auxInt8, + argLen: 3, + asm: x86.AVPSHLDQ, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPSHRDQ128", auxType: auxInt8, @@ -30653,17 +30637,18 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPCMPQ256", + name: "VPSHRDQMasked128", auxType: auxInt8, - argLen: 2, - asm: x86.AVPCMPQ, + argLen: 3, + asm: x86.AVPSHRDQ, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ - {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, }, }, @@ -30684,6 +30669,35 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPCMPQ256", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPCMPQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + }, + }, + }, + { + name: "VPROLQ256", + auxType: auxInt8, + argLen: 1, + asm: x86.AVPROLQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPROLQMasked256", auxType: auxInt8, @@ -30699,6 +30713,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPRORQ256", + auxType: auxInt8, + argLen: 1, + asm: x86.AVPRORQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPRORQMasked256", auxType: auxInt8, @@ -30715,15 +30743,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHLDQMasked256", + name: "VPSHLDQ256", auxType: auxInt8, - argLen: 3, + argLen: 2, asm: x86.AVPSHLDQ, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -30731,10 +30758,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHRDQMasked256", + name: "VPSHLDQMasked256", auxType: auxInt8, argLen: 3, - asm: x86.AVPSHRDQ, + asm: x86.AVPSHLDQ, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -30747,38 +30774,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPROLQ256", - auxType: auxInt8, - argLen: 1, - asm: x86.AVPROLQ, - reg: regInfo{ - inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, - { - name: "VPRORQ256", - auxType: auxInt8, - argLen: 1, - asm: x86.AVPRORQ, - reg: regInfo{ - inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, - { - name: "VPSHLDQ256", + name: "VPSHRDQ256", auxType: auxInt8, argLen: 2, - asm: x86.AVPSHLDQ, + asm: x86.AVPSHRDQ, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -30790,14 +30789,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHRDQ256", + name: "VPSHRDQMasked256", auxType: auxInt8, - argLen: 2, + argLen: 3, asm: x86.AVPSHRDQ, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -30838,14 +30838,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPROLQMasked512", + name: "VPROLQ512", auxType: auxInt8, - argLen: 2, + argLen: 1, asm: x86.AVPROLQ, reg: regInfo{ inputs: []inputInfo{ - {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -30853,10 +30852,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPRORQMasked512", + name: "VPROLQMasked512", auxType: auxInt8, argLen: 2, - asm: x86.AVPRORQ, + asm: x86.AVPROLQ, reg: regInfo{ inputs: []inputInfo{ {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -30868,15 +30867,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHLDQMasked512", + name: "VPRORQ512", auxType: auxInt8, - argLen: 3, - asm: x86.AVPSHLDQ, + argLen: 1, + asm: x86.AVPRORQ, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -30884,15 +30881,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHRDQMasked512", + name: "VPRORQMasked512", auxType: auxInt8, - argLen: 3, - asm: x86.AVPSHRDQ, + argLen: 2, + asm: x86.AVPRORQ, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -30900,13 +30896,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPROLQ512", + name: "VPSHLDQ512", auxType: auxInt8, - argLen: 1, - asm: x86.AVPROLQ, + argLen: 2, + asm: x86.AVPSHLDQ, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -30914,13 +30911,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPRORQ512", + name: "VPSHLDQMasked512", auxType: auxInt8, - argLen: 1, - asm: x86.AVPRORQ, + argLen: 3, + asm: x86.AVPSHLDQ, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -30928,10 +30927,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHLDQ512", + name: "VPSHRDQ512", auxType: auxInt8, argLen: 2, - asm: x86.AVPSHLDQ, + asm: x86.AVPSHRDQ, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -30943,20 +30942,38 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSHRDQ512", + name: "VPSHRDQMasked512", auxType: auxInt8, - argLen: 2, + argLen: 3, asm: x86.AVPSHRDQ, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, }, }, + { + name: "VPCMPBMasked128", + auxType: auxInt8, + argLen: 3, + commutative: true, + asm: x86.AVPCMPB, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + }, + }, + }, { name: "VPEXTRB128", auxType: auxInt8, @@ -30987,34 +31004,34 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPCMPBMasked128", - auxType: auxInt8, - argLen: 3, - commutative: true, - asm: x86.AVPCMPB, + name: "VPINSRB128", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPINSRB, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ - {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, }, }, { - name: "VPINSRB128", - auxType: auxInt8, - argLen: 2, - asm: x86.AVPINSRB, + name: "VPCMPBMasked256", + auxType: auxInt8, + argLen: 3, + commutative: true, + asm: x86.AVPCMPB, reg: regInfo{ inputs: []inputInfo{ - {1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 }, }, }, @@ -31047,23 +31064,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "VPCMPBMasked256", - auxType: auxInt8, - argLen: 3, - commutative: true, - asm: x86.AVPCMPB, - reg: regInfo{ - inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - }, - }, - }, { name: "VINSERTI128256", auxType: auxInt8, @@ -31425,6 +31425,23 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPCMPUBMasked128", + auxType: auxInt8, + argLen: 3, + commutative: true, + asm: x86.AVPCMPUB, + reg: regInfo{ + inputs: []inputInfo{ + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + }, + }, + }, { name: "VGF2P8AFFINEQB128", auxType: auxInt8, @@ -31456,11 +31473,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPCMPUBMasked128", - auxType: auxInt8, - argLen: 3, - commutative: true, - asm: x86.AVPCMPUB, + name: "VGF2P8AFFINEINVQBMasked128", + auxType: auxInt8, + argLen: 3, + asm: x86.AVGF2P8AFFINEINVQB, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -31468,7 +31484,7 @@ var opcodeTable = [...]opInfo{ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ - {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, }, }, @@ -31489,31 +31505,32 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VGF2P8AFFINEINVQBMasked128", - auxType: auxInt8, - argLen: 3, - asm: x86.AVGF2P8AFFINEINVQB, + name: "VPCMPUB256", + auxType: auxInt8, + argLen: 2, + commutative: true, + asm: x86.AVPCMPUB, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 }, }, }, { - name: "VPCMPUB256", + name: "VPCMPUBMasked256", auxType: auxInt8, - argLen: 2, + argLen: 3, commutative: true, asm: x86.AVPCMPUB, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -31551,11 +31568,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPCMPUBMasked256", - auxType: auxInt8, - argLen: 3, - commutative: true, - asm: x86.AVPCMPUB, + name: "VGF2P8AFFINEINVQBMasked256", + auxType: auxInt8, + argLen: 3, + asm: x86.AVGF2P8AFFINEINVQB, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -31563,7 +31579,7 @@ var opcodeTable = [...]opInfo{ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ - {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, }, }, @@ -31584,31 +31600,32 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VGF2P8AFFINEINVQBMasked256", - auxType: auxInt8, - argLen: 3, - asm: x86.AVGF2P8AFFINEINVQB, + name: "VPCMPUB512", + auxType: auxInt8, + argLen: 2, + commutative: true, + asm: x86.AVPCMPUB, reg: regInfo{ inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 }, }, }, { - name: "VPCMPUB512", + name: "VPCMPUBMasked512", auxType: auxInt8, - argLen: 2, + argLen: 3, commutative: true, asm: x86.AVPCMPUB, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -31646,27 +31663,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPCMPUBMasked512", - auxType: auxInt8, - argLen: 3, - commutative: true, - asm: x86.AVPCMPUB, - reg: regInfo{ - inputs: []inputInfo{ - {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 - }, - }, - }, - { - name: "VGF2P8AFFINEQBMasked512", + name: "VGF2P8AFFINEINVQBMasked512", auxType: auxInt8, argLen: 3, - asm: x86.AVGF2P8AFFINEQB, + asm: x86.AVGF2P8AFFINEINVQB, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -31679,10 +31679,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VGF2P8AFFINEINVQBMasked512", + name: "VGF2P8AFFINEQBMasked512", auxType: auxInt8, argLen: 3, - asm: x86.AVGF2P8AFFINEINVQB, + asm: x86.AVGF2P8AFFINEQB, reg: regInfo{ inputs: []inputInfo{ {2, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7 @@ -58852,207 +58852,202 @@ var opcodeTable = [...]opInfo{ commutative: true, generic: true, }, + { + name: "AddMaskedFloat32x16", + argLen: 3, + commutative: true, + generic: true, + }, { name: "ApproximateReciprocalFloat32x16", argLen: 1, generic: true, }, + { + name: "ApproximateReciprocalMaskedFloat32x16", + argLen: 2, + generic: true, + }, { name: "ApproximateReciprocalOfSqrtFloat32x16", argLen: 1, generic: true, }, + { + name: "ApproximateReciprocalOfSqrtMaskedFloat32x16", + argLen: 2, + generic: true, + }, { name: "DivFloat32x16", argLen: 2, generic: true, }, + { + name: "DivMaskedFloat32x16", + argLen: 3, + generic: true, + }, { name: "EqualFloat32x16", argLen: 2, commutative: true, generic: true, }, + { + name: "EqualMaskedFloat32x16", + argLen: 3, + commutative: true, + generic: true, + }, { name: "FusedMultiplyAddFloat32x16", argLen: 3, generic: true, }, { - name: "FusedMultiplyAddSubFloat32x16", - argLen: 3, + name: "FusedMultiplyAddMaskedFloat32x16", + argLen: 4, generic: true, }, { - name: "FusedMultiplySubAddFloat32x16", + name: "FusedMultiplyAddSubFloat32x16", argLen: 3, generic: true, }, { - name: "GreaterFloat32x16", - argLen: 2, + name: "FusedMultiplyAddSubMaskedFloat32x16", + argLen: 4, generic: true, }, { - name: "GreaterEqualFloat32x16", - argLen: 2, + name: "FusedMultiplySubAddFloat32x16", + argLen: 3, generic: true, }, { - name: "IsNanFloat32x16", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "LessFloat32x16", - argLen: 2, + name: "FusedMultiplySubAddMaskedFloat32x16", + argLen: 4, generic: true, }, { - name: "LessEqualFloat32x16", + name: "GreaterFloat32x16", argLen: 2, generic: true, }, { - name: "MaskedAddFloat32x16", - argLen: 3, - commutative: true, - generic: true, - }, - { - name: "MaskedApproximateReciprocalFloat32x16", + name: "GreaterEqualFloat32x16", argLen: 2, generic: true, }, { - name: "MaskedApproximateReciprocalOfSqrtFloat32x16", - argLen: 2, + name: "GreaterEqualMaskedFloat32x16", + argLen: 3, generic: true, }, { - name: "MaskedDivFloat32x16", + name: "GreaterMaskedFloat32x16", argLen: 3, generic: true, }, { - name: "MaskedEqualFloat32x16", - argLen: 3, + name: "IsNanFloat32x16", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedFusedMultiplyAddFloat32x16", - argLen: 4, - generic: true, + name: "IsNanMaskedFloat32x16", + argLen: 3, + commutative: true, + generic: true, }, { - name: "MaskedFusedMultiplyAddSubFloat32x16", - argLen: 4, + name: "LessFloat32x16", + argLen: 2, generic: true, }, { - name: "MaskedFusedMultiplySubAddFloat32x16", - argLen: 4, + name: "LessEqualFloat32x16", + argLen: 2, generic: true, }, { - name: "MaskedGreaterFloat32x16", + name: "LessEqualMaskedFloat32x16", argLen: 3, generic: true, }, { - name: "MaskedGreaterEqualFloat32x16", + name: "LessMaskedFloat32x16", argLen: 3, generic: true, }, { - name: "MaskedIsNanFloat32x16", - argLen: 3, + name: "MaxFloat32x16", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedLessFloat32x16", - argLen: 3, - generic: true, - }, - { - name: "MaskedLessEqualFloat32x16", - argLen: 3, - generic: true, - }, - { - name: "MaskedMaxFloat32x16", + name: "MaxMaskedFloat32x16", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedMinFloat32x16", - argLen: 3, + name: "MinFloat32x16", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedMulFloat32x16", + name: "MinMaskedFloat32x16", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedMulByPowOf2Float32x16", - argLen: 3, - generic: true, - }, - { - name: "MaskedNotEqualFloat32x16", - argLen: 3, + name: "MulFloat32x16", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedSqrtFloat32x16", + name: "MulByPowOf2Float32x16", argLen: 2, generic: true, }, { - name: "MaskedSubFloat32x16", + name: "MulByPowOf2MaskedFloat32x16", argLen: 3, generic: true, }, { - name: "MaxFloat32x16", - argLen: 2, + name: "MulMaskedFloat32x16", + argLen: 3, commutative: true, generic: true, }, { - name: "MinFloat32x16", + name: "NotEqualFloat32x16", argLen: 2, commutative: true, generic: true, }, { - name: "MulFloat32x16", - argLen: 2, + name: "NotEqualMaskedFloat32x16", + argLen: 3, commutative: true, generic: true, }, { - name: "MulByPowOf2Float32x16", - argLen: 2, + name: "SqrtFloat32x16", + argLen: 1, generic: true, }, { - name: "NotEqualFloat32x16", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "SqrtFloat32x16", - argLen: 1, + name: "SqrtMaskedFloat32x16", + argLen: 2, generic: true, }, { @@ -59060,12 +59055,23 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "SubMaskedFloat32x16", + argLen: 3, + generic: true, + }, { name: "AddFloat32x4", argLen: 2, commutative: true, generic: true, }, + { + name: "AddMaskedFloat32x4", + argLen: 3, + commutative: true, + generic: true, + }, { name: "AddSubFloat32x4", argLen: 2, @@ -59076,11 +59082,21 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "ApproximateReciprocalMaskedFloat32x4", + argLen: 2, + generic: true, + }, { name: "ApproximateReciprocalOfSqrtFloat32x4", argLen: 1, generic: true, }, + { + name: "ApproximateReciprocalOfSqrtMaskedFloat32x4", + argLen: 2, + generic: true, + }, { name: "CeilFloat32x4", argLen: 1, @@ -59091,12 +59107,23 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "DivMaskedFloat32x4", + argLen: 3, + generic: true, + }, { name: "EqualFloat32x4", argLen: 2, commutative: true, generic: true, }, + { + name: "EqualMaskedFloat32x4", + argLen: 3, + commutative: true, + generic: true, + }, { name: "FloorFloat32x4", argLen: 1, @@ -59108,174 +59135,137 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "FusedMultiplyAddSubFloat32x4", - argLen: 3, + name: "FusedMultiplyAddMaskedFloat32x4", + argLen: 4, generic: true, }, { - name: "FusedMultiplySubAddFloat32x4", + name: "FusedMultiplyAddSubFloat32x4", argLen: 3, generic: true, }, { - name: "GreaterFloat32x4", - argLen: 2, + name: "FusedMultiplyAddSubMaskedFloat32x4", + argLen: 4, generic: true, }, { - name: "GreaterEqualFloat32x4", - argLen: 2, + name: "FusedMultiplySubAddFloat32x4", + argLen: 3, generic: true, }, { - name: "IsNanFloat32x4", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "LessFloat32x4", - argLen: 2, + name: "FusedMultiplySubAddMaskedFloat32x4", + argLen: 4, generic: true, }, { - name: "LessEqualFloat32x4", + name: "GreaterFloat32x4", argLen: 2, generic: true, }, { - name: "MaskedAddFloat32x4", - argLen: 3, - commutative: true, - generic: true, - }, - { - name: "MaskedApproximateReciprocalFloat32x4", + name: "GreaterEqualFloat32x4", argLen: 2, generic: true, }, { - name: "MaskedApproximateReciprocalOfSqrtFloat32x4", - argLen: 2, + name: "GreaterEqualMaskedFloat32x4", + argLen: 3, generic: true, }, { - name: "MaskedDivFloat32x4", + name: "GreaterMaskedFloat32x4", argLen: 3, generic: true, }, { - name: "MaskedEqualFloat32x4", - argLen: 3, + name: "IsNanFloat32x4", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedFusedMultiplyAddFloat32x4", - argLen: 4, - generic: true, + name: "IsNanMaskedFloat32x4", + argLen: 3, + commutative: true, + generic: true, }, { - name: "MaskedFusedMultiplyAddSubFloat32x4", - argLen: 4, + name: "LessFloat32x4", + argLen: 2, generic: true, }, { - name: "MaskedFusedMultiplySubAddFloat32x4", - argLen: 4, + name: "LessEqualFloat32x4", + argLen: 2, generic: true, }, { - name: "MaskedGreaterFloat32x4", + name: "LessEqualMaskedFloat32x4", argLen: 3, generic: true, }, { - name: "MaskedGreaterEqualFloat32x4", + name: "LessMaskedFloat32x4", argLen: 3, generic: true, }, { - name: "MaskedIsNanFloat32x4", - argLen: 3, + name: "MaxFloat32x4", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedLessFloat32x4", - argLen: 3, - generic: true, - }, - { - name: "MaskedLessEqualFloat32x4", - argLen: 3, - generic: true, - }, - { - name: "MaskedMaxFloat32x4", + name: "MaxMaskedFloat32x4", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedMinFloat32x4", - argLen: 3, + name: "MinFloat32x4", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedMulFloat32x4", + name: "MinMaskedFloat32x4", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedMulByPowOf2Float32x4", - argLen: 3, - generic: true, - }, - { - name: "MaskedNotEqualFloat32x4", - argLen: 3, + name: "MulFloat32x4", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedSqrtFloat32x4", + name: "MulByPowOf2Float32x4", argLen: 2, generic: true, }, { - name: "MaskedSubFloat32x4", + name: "MulByPowOf2MaskedFloat32x4", argLen: 3, generic: true, }, { - name: "MaxFloat32x4", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "MinFloat32x4", - argLen: 2, + name: "MulMaskedFloat32x4", + argLen: 3, commutative: true, generic: true, }, { - name: "MulFloat32x4", + name: "NotEqualFloat32x4", argLen: 2, commutative: true, generic: true, }, { - name: "MulByPowOf2Float32x4", - argLen: 2, - generic: true, - }, - { - name: "NotEqualFloat32x4", - argLen: 2, + name: "NotEqualMaskedFloat32x4", + argLen: 3, commutative: true, generic: true, }, @@ -59299,11 +59289,21 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "SqrtMaskedFloat32x4", + argLen: 2, + generic: true, + }, { name: "SubFloat32x4", argLen: 2, generic: true, }, + { + name: "SubMaskedFloat32x4", + argLen: 3, + generic: true, + }, { name: "TruncFloat32x4", argLen: 1, @@ -59315,6 +59315,12 @@ var opcodeTable = [...]opInfo{ commutative: true, generic: true, }, + { + name: "AddMaskedFloat32x8", + argLen: 3, + commutative: true, + generic: true, + }, { name: "AddSubFloat32x8", argLen: 2, @@ -59325,11 +59331,21 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "ApproximateReciprocalMaskedFloat32x8", + argLen: 2, + generic: true, + }, { name: "ApproximateReciprocalOfSqrtFloat32x8", argLen: 1, generic: true, }, + { + name: "ApproximateReciprocalOfSqrtMaskedFloat32x8", + argLen: 2, + generic: true, + }, { name: "CeilFloat32x8", argLen: 1, @@ -59340,12 +59356,23 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "DivMaskedFloat32x8", + argLen: 3, + generic: true, + }, { name: "EqualFloat32x8", argLen: 2, commutative: true, generic: true, }, + { + name: "EqualMaskedFloat32x8", + argLen: 3, + commutative: true, + generic: true, + }, { name: "FloorFloat32x8", argLen: 1, @@ -59357,174 +59384,137 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "FusedMultiplyAddSubFloat32x8", - argLen: 3, + name: "FusedMultiplyAddMaskedFloat32x8", + argLen: 4, generic: true, }, { - name: "FusedMultiplySubAddFloat32x8", + name: "FusedMultiplyAddSubFloat32x8", argLen: 3, generic: true, }, { - name: "GreaterFloat32x8", - argLen: 2, + name: "FusedMultiplyAddSubMaskedFloat32x8", + argLen: 4, generic: true, }, { - name: "GreaterEqualFloat32x8", - argLen: 2, + name: "FusedMultiplySubAddFloat32x8", + argLen: 3, generic: true, }, { - name: "IsNanFloat32x8", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "LessFloat32x8", - argLen: 2, + name: "FusedMultiplySubAddMaskedFloat32x8", + argLen: 4, generic: true, }, { - name: "LessEqualFloat32x8", + name: "GreaterFloat32x8", argLen: 2, generic: true, }, { - name: "MaskedAddFloat32x8", - argLen: 3, - commutative: true, - generic: true, - }, - { - name: "MaskedApproximateReciprocalFloat32x8", + name: "GreaterEqualFloat32x8", argLen: 2, generic: true, }, { - name: "MaskedApproximateReciprocalOfSqrtFloat32x8", - argLen: 2, + name: "GreaterEqualMaskedFloat32x8", + argLen: 3, generic: true, }, { - name: "MaskedDivFloat32x8", + name: "GreaterMaskedFloat32x8", argLen: 3, generic: true, }, { - name: "MaskedEqualFloat32x8", - argLen: 3, + name: "IsNanFloat32x8", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedFusedMultiplyAddFloat32x8", - argLen: 4, - generic: true, + name: "IsNanMaskedFloat32x8", + argLen: 3, + commutative: true, + generic: true, }, { - name: "MaskedFusedMultiplyAddSubFloat32x8", - argLen: 4, + name: "LessFloat32x8", + argLen: 2, generic: true, }, { - name: "MaskedFusedMultiplySubAddFloat32x8", - argLen: 4, + name: "LessEqualFloat32x8", + argLen: 2, generic: true, }, { - name: "MaskedGreaterFloat32x8", + name: "LessEqualMaskedFloat32x8", argLen: 3, generic: true, }, { - name: "MaskedGreaterEqualFloat32x8", + name: "LessMaskedFloat32x8", argLen: 3, generic: true, }, { - name: "MaskedIsNanFloat32x8", - argLen: 3, + name: "MaxFloat32x8", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedLessFloat32x8", - argLen: 3, - generic: true, - }, - { - name: "MaskedLessEqualFloat32x8", - argLen: 3, - generic: true, - }, - { - name: "MaskedMaxFloat32x8", + name: "MaxMaskedFloat32x8", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedMinFloat32x8", - argLen: 3, + name: "MinFloat32x8", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedMulFloat32x8", + name: "MinMaskedFloat32x8", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedMulByPowOf2Float32x8", - argLen: 3, - generic: true, - }, - { - name: "MaskedNotEqualFloat32x8", - argLen: 3, + name: "MulFloat32x8", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedSqrtFloat32x8", + name: "MulByPowOf2Float32x8", argLen: 2, generic: true, }, { - name: "MaskedSubFloat32x8", + name: "MulByPowOf2MaskedFloat32x8", argLen: 3, generic: true, }, { - name: "MaxFloat32x8", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "MinFloat32x8", - argLen: 2, + name: "MulMaskedFloat32x8", + argLen: 3, commutative: true, generic: true, }, { - name: "MulFloat32x8", + name: "NotEqualFloat32x8", argLen: 2, commutative: true, generic: true, }, { - name: "MulByPowOf2Float32x8", - argLen: 2, - generic: true, - }, - { - name: "NotEqualFloat32x8", - argLen: 2, + name: "NotEqualMaskedFloat32x8", + argLen: 3, commutative: true, generic: true, }, @@ -59548,11 +59538,21 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "SqrtMaskedFloat32x8", + argLen: 2, + generic: true, + }, { name: "SubFloat32x8", argLen: 2, generic: true, }, + { + name: "SubMaskedFloat32x8", + argLen: 3, + generic: true, + }, { name: "TruncFloat32x8", argLen: 1, @@ -59564,6 +59564,12 @@ var opcodeTable = [...]opInfo{ commutative: true, generic: true, }, + { + name: "AddMaskedFloat64x2", + argLen: 3, + commutative: true, + generic: true, + }, { name: "AddSubFloat64x2", argLen: 2, @@ -59574,19 +59580,34 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "ApproximateReciprocalMaskedFloat64x2", + argLen: 2, + generic: true, + }, { name: "ApproximateReciprocalOfSqrtFloat64x2", argLen: 1, generic: true, }, + { + name: "ApproximateReciprocalOfSqrtMaskedFloat64x2", + argLen: 2, + generic: true, + }, { name: "CeilFloat64x2", argLen: 1, generic: true, }, { - name: "DivFloat64x2", - argLen: 2, + name: "DivFloat64x2", + argLen: 2, + generic: true, + }, + { + name: "DivMaskedFloat64x2", + argLen: 3, generic: true, }, { @@ -59601,6 +59622,12 @@ var opcodeTable = [...]opInfo{ commutative: true, generic: true, }, + { + name: "EqualMaskedFloat64x2", + argLen: 3, + commutative: true, + generic: true, + }, { name: "FloorFloat64x2", argLen: 1, @@ -59612,174 +59639,137 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "FusedMultiplyAddSubFloat64x2", - argLen: 3, + name: "FusedMultiplyAddMaskedFloat64x2", + argLen: 4, generic: true, }, { - name: "FusedMultiplySubAddFloat64x2", + name: "FusedMultiplyAddSubFloat64x2", argLen: 3, generic: true, }, { - name: "GreaterFloat64x2", - argLen: 2, + name: "FusedMultiplyAddSubMaskedFloat64x2", + argLen: 4, generic: true, }, { - name: "GreaterEqualFloat64x2", - argLen: 2, + name: "FusedMultiplySubAddFloat64x2", + argLen: 3, generic: true, }, { - name: "IsNanFloat64x2", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "LessFloat64x2", - argLen: 2, + name: "FusedMultiplySubAddMaskedFloat64x2", + argLen: 4, generic: true, }, { - name: "LessEqualFloat64x2", + name: "GreaterFloat64x2", argLen: 2, generic: true, }, { - name: "MaskedAddFloat64x2", - argLen: 3, - commutative: true, - generic: true, - }, - { - name: "MaskedApproximateReciprocalFloat64x2", + name: "GreaterEqualFloat64x2", argLen: 2, generic: true, }, { - name: "MaskedApproximateReciprocalOfSqrtFloat64x2", - argLen: 2, + name: "GreaterEqualMaskedFloat64x2", + argLen: 3, generic: true, }, { - name: "MaskedDivFloat64x2", + name: "GreaterMaskedFloat64x2", argLen: 3, generic: true, }, { - name: "MaskedEqualFloat64x2", - argLen: 3, + name: "IsNanFloat64x2", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedFusedMultiplyAddFloat64x2", - argLen: 4, - generic: true, + name: "IsNanMaskedFloat64x2", + argLen: 3, + commutative: true, + generic: true, }, { - name: "MaskedFusedMultiplyAddSubFloat64x2", - argLen: 4, + name: "LessFloat64x2", + argLen: 2, generic: true, }, { - name: "MaskedFusedMultiplySubAddFloat64x2", - argLen: 4, + name: "LessEqualFloat64x2", + argLen: 2, generic: true, }, { - name: "MaskedGreaterFloat64x2", + name: "LessEqualMaskedFloat64x2", argLen: 3, generic: true, }, { - name: "MaskedGreaterEqualFloat64x2", + name: "LessMaskedFloat64x2", argLen: 3, generic: true, }, { - name: "MaskedIsNanFloat64x2", - argLen: 3, + name: "MaxFloat64x2", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedLessFloat64x2", - argLen: 3, - generic: true, - }, - { - name: "MaskedLessEqualFloat64x2", - argLen: 3, - generic: true, - }, - { - name: "MaskedMaxFloat64x2", + name: "MaxMaskedFloat64x2", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedMinFloat64x2", - argLen: 3, + name: "MinFloat64x2", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedMulFloat64x2", + name: "MinMaskedFloat64x2", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedMulByPowOf2Float64x2", - argLen: 3, - generic: true, - }, - { - name: "MaskedNotEqualFloat64x2", - argLen: 3, + name: "MulFloat64x2", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedSqrtFloat64x2", + name: "MulByPowOf2Float64x2", argLen: 2, generic: true, }, { - name: "MaskedSubFloat64x2", + name: "MulByPowOf2MaskedFloat64x2", argLen: 3, generic: true, }, { - name: "MaxFloat64x2", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "MinFloat64x2", - argLen: 2, + name: "MulMaskedFloat64x2", + argLen: 3, commutative: true, generic: true, }, { - name: "MulFloat64x2", + name: "NotEqualFloat64x2", argLen: 2, commutative: true, generic: true, }, { - name: "MulByPowOf2Float64x2", - argLen: 2, - generic: true, - }, - { - name: "NotEqualFloat64x2", - argLen: 2, + name: "NotEqualMaskedFloat64x2", + argLen: 3, commutative: true, generic: true, }, @@ -59803,11 +59793,21 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "SqrtMaskedFloat64x2", + argLen: 2, + generic: true, + }, { name: "SubFloat64x2", argLen: 2, generic: true, }, + { + name: "SubMaskedFloat64x2", + argLen: 3, + generic: true, + }, { name: "TruncFloat64x2", argLen: 1, @@ -59819,6 +59819,12 @@ var opcodeTable = [...]opInfo{ commutative: true, generic: true, }, + { + name: "AddMaskedFloat64x4", + argLen: 3, + commutative: true, + generic: true, + }, { name: "AddSubFloat64x4", argLen: 2, @@ -59829,11 +59835,21 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "ApproximateReciprocalMaskedFloat64x4", + argLen: 2, + generic: true, + }, { name: "ApproximateReciprocalOfSqrtFloat64x4", argLen: 1, generic: true, }, + { + name: "ApproximateReciprocalOfSqrtMaskedFloat64x4", + argLen: 2, + generic: true, + }, { name: "CeilFloat64x4", argLen: 1, @@ -59844,12 +59860,23 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "DivMaskedFloat64x4", + argLen: 3, + generic: true, + }, { name: "EqualFloat64x4", argLen: 2, commutative: true, generic: true, }, + { + name: "EqualMaskedFloat64x4", + argLen: 3, + commutative: true, + generic: true, + }, { name: "FloorFloat64x4", argLen: 1, @@ -59861,174 +59888,137 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "FusedMultiplyAddSubFloat64x4", - argLen: 3, + name: "FusedMultiplyAddMaskedFloat64x4", + argLen: 4, generic: true, }, { - name: "FusedMultiplySubAddFloat64x4", + name: "FusedMultiplyAddSubFloat64x4", argLen: 3, generic: true, }, { - name: "GreaterFloat64x4", - argLen: 2, + name: "FusedMultiplyAddSubMaskedFloat64x4", + argLen: 4, generic: true, }, { - name: "GreaterEqualFloat64x4", - argLen: 2, + name: "FusedMultiplySubAddFloat64x4", + argLen: 3, generic: true, }, { - name: "IsNanFloat64x4", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "LessFloat64x4", - argLen: 2, + name: "FusedMultiplySubAddMaskedFloat64x4", + argLen: 4, generic: true, }, { - name: "LessEqualFloat64x4", + name: "GreaterFloat64x4", argLen: 2, generic: true, }, { - name: "MaskedAddFloat64x4", - argLen: 3, - commutative: true, - generic: true, - }, - { - name: "MaskedApproximateReciprocalFloat64x4", + name: "GreaterEqualFloat64x4", argLen: 2, generic: true, }, { - name: "MaskedApproximateReciprocalOfSqrtFloat64x4", - argLen: 2, + name: "GreaterEqualMaskedFloat64x4", + argLen: 3, generic: true, }, { - name: "MaskedDivFloat64x4", + name: "GreaterMaskedFloat64x4", argLen: 3, generic: true, }, { - name: "MaskedEqualFloat64x4", - argLen: 3, + name: "IsNanFloat64x4", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedFusedMultiplyAddFloat64x4", - argLen: 4, - generic: true, + name: "IsNanMaskedFloat64x4", + argLen: 3, + commutative: true, + generic: true, }, { - name: "MaskedFusedMultiplyAddSubFloat64x4", - argLen: 4, + name: "LessFloat64x4", + argLen: 2, generic: true, }, { - name: "MaskedFusedMultiplySubAddFloat64x4", - argLen: 4, + name: "LessEqualFloat64x4", + argLen: 2, generic: true, }, { - name: "MaskedGreaterFloat64x4", + name: "LessEqualMaskedFloat64x4", argLen: 3, generic: true, }, { - name: "MaskedGreaterEqualFloat64x4", + name: "LessMaskedFloat64x4", argLen: 3, generic: true, }, { - name: "MaskedIsNanFloat64x4", - argLen: 3, + name: "MaxFloat64x4", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedLessFloat64x4", - argLen: 3, - generic: true, - }, - { - name: "MaskedLessEqualFloat64x4", - argLen: 3, - generic: true, - }, - { - name: "MaskedMaxFloat64x4", + name: "MaxMaskedFloat64x4", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedMinFloat64x4", - argLen: 3, + name: "MinFloat64x4", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedMulFloat64x4", + name: "MinMaskedFloat64x4", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedMulByPowOf2Float64x4", - argLen: 3, - generic: true, - }, - { - name: "MaskedNotEqualFloat64x4", - argLen: 3, + name: "MulFloat64x4", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedSqrtFloat64x4", + name: "MulByPowOf2Float64x4", argLen: 2, generic: true, }, { - name: "MaskedSubFloat64x4", + name: "MulByPowOf2MaskedFloat64x4", argLen: 3, generic: true, }, { - name: "MaxFloat64x4", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "MinFloat64x4", - argLen: 2, + name: "MulMaskedFloat64x4", + argLen: 3, commutative: true, generic: true, }, { - name: "MulFloat64x4", + name: "NotEqualFloat64x4", argLen: 2, commutative: true, generic: true, }, { - name: "MulByPowOf2Float64x4", - argLen: 2, - generic: true, - }, - { - name: "NotEqualFloat64x4", - argLen: 2, + name: "NotEqualMaskedFloat64x4", + argLen: 3, commutative: true, generic: true, }, @@ -60052,11 +60042,21 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "SqrtMaskedFloat64x4", + argLen: 2, + generic: true, + }, { name: "SubFloat64x4", argLen: 2, generic: true, }, + { + name: "SubMaskedFloat64x4", + argLen: 3, + generic: true, + }, { name: "TruncFloat64x4", argLen: 1, @@ -60068,207 +60068,202 @@ var opcodeTable = [...]opInfo{ commutative: true, generic: true, }, + { + name: "AddMaskedFloat64x8", + argLen: 3, + commutative: true, + generic: true, + }, { name: "ApproximateReciprocalFloat64x8", argLen: 1, generic: true, }, + { + name: "ApproximateReciprocalMaskedFloat64x8", + argLen: 2, + generic: true, + }, { name: "ApproximateReciprocalOfSqrtFloat64x8", argLen: 1, generic: true, }, + { + name: "ApproximateReciprocalOfSqrtMaskedFloat64x8", + argLen: 2, + generic: true, + }, { name: "DivFloat64x8", argLen: 2, generic: true, }, + { + name: "DivMaskedFloat64x8", + argLen: 3, + generic: true, + }, { name: "EqualFloat64x8", argLen: 2, commutative: true, generic: true, }, + { + name: "EqualMaskedFloat64x8", + argLen: 3, + commutative: true, + generic: true, + }, { name: "FusedMultiplyAddFloat64x8", argLen: 3, generic: true, }, { - name: "FusedMultiplyAddSubFloat64x8", - argLen: 3, + name: "FusedMultiplyAddMaskedFloat64x8", + argLen: 4, generic: true, }, { - name: "FusedMultiplySubAddFloat64x8", + name: "FusedMultiplyAddSubFloat64x8", argLen: 3, generic: true, }, { - name: "GreaterFloat64x8", - argLen: 2, + name: "FusedMultiplyAddSubMaskedFloat64x8", + argLen: 4, generic: true, }, { - name: "GreaterEqualFloat64x8", - argLen: 2, + name: "FusedMultiplySubAddFloat64x8", + argLen: 3, generic: true, }, { - name: "IsNanFloat64x8", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "LessFloat64x8", - argLen: 2, + name: "FusedMultiplySubAddMaskedFloat64x8", + argLen: 4, generic: true, }, { - name: "LessEqualFloat64x8", + name: "GreaterFloat64x8", argLen: 2, generic: true, }, { - name: "MaskedAddFloat64x8", - argLen: 3, - commutative: true, - generic: true, - }, - { - name: "MaskedApproximateReciprocalFloat64x8", + name: "GreaterEqualFloat64x8", argLen: 2, generic: true, }, { - name: "MaskedApproximateReciprocalOfSqrtFloat64x8", - argLen: 2, + name: "GreaterEqualMaskedFloat64x8", + argLen: 3, generic: true, }, { - name: "MaskedDivFloat64x8", + name: "GreaterMaskedFloat64x8", argLen: 3, generic: true, }, { - name: "MaskedEqualFloat64x8", - argLen: 3, + name: "IsNanFloat64x8", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedFusedMultiplyAddFloat64x8", - argLen: 4, - generic: true, + name: "IsNanMaskedFloat64x8", + argLen: 3, + commutative: true, + generic: true, }, { - name: "MaskedFusedMultiplyAddSubFloat64x8", - argLen: 4, + name: "LessFloat64x8", + argLen: 2, generic: true, }, { - name: "MaskedFusedMultiplySubAddFloat64x8", - argLen: 4, + name: "LessEqualFloat64x8", + argLen: 2, generic: true, }, { - name: "MaskedGreaterFloat64x8", + name: "LessEqualMaskedFloat64x8", argLen: 3, generic: true, }, { - name: "MaskedGreaterEqualFloat64x8", + name: "LessMaskedFloat64x8", argLen: 3, generic: true, }, { - name: "MaskedIsNanFloat64x8", - argLen: 3, + name: "MaxFloat64x8", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedLessFloat64x8", - argLen: 3, - generic: true, - }, - { - name: "MaskedLessEqualFloat64x8", - argLen: 3, - generic: true, - }, - { - name: "MaskedMaxFloat64x8", + name: "MaxMaskedFloat64x8", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedMinFloat64x8", - argLen: 3, + name: "MinFloat64x8", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedMulFloat64x8", + name: "MinMaskedFloat64x8", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedMulByPowOf2Float64x8", - argLen: 3, - generic: true, - }, - { - name: "MaskedNotEqualFloat64x8", - argLen: 3, + name: "MulFloat64x8", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedSqrtFloat64x8", + name: "MulByPowOf2Float64x8", argLen: 2, generic: true, }, { - name: "MaskedSubFloat64x8", + name: "MulByPowOf2MaskedFloat64x8", argLen: 3, generic: true, }, { - name: "MaxFloat64x8", - argLen: 2, + name: "MulMaskedFloat64x8", + argLen: 3, commutative: true, generic: true, }, { - name: "MinFloat64x8", + name: "NotEqualFloat64x8", argLen: 2, commutative: true, generic: true, }, { - name: "MulFloat64x8", - argLen: 2, + name: "NotEqualMaskedFloat64x8", + argLen: 3, commutative: true, generic: true, }, { - name: "MulByPowOf2Float64x8", - argLen: 2, + name: "SqrtFloat64x8", + argLen: 1, generic: true, }, { - name: "NotEqualFloat64x8", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "SqrtFloat64x8", - argLen: 1, + name: "SqrtMaskedFloat64x8", + argLen: 2, generic: true, }, { @@ -60276,17 +60271,33 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "SubMaskedFloat64x8", + argLen: 3, + generic: true, + }, { name: "AbsoluteInt16x16", argLen: 1, generic: true, }, + { + name: "AbsoluteMaskedInt16x16", + argLen: 2, + generic: true, + }, { name: "AddInt16x16", argLen: 2, commutative: true, generic: true, }, + { + name: "AddMaskedInt16x16", + argLen: 3, + commutative: true, + generic: true, + }, { name: "AndInt16x16", argLen: 2, @@ -60304,6 +60315,12 @@ var opcodeTable = [...]opInfo{ commutative: true, generic: true, }, + { + name: "EqualMaskedInt16x16", + argLen: 3, + commutative: true, + generic: true, + }, { name: "GreaterInt16x16", argLen: 2, @@ -60315,160 +60332,92 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "LessInt16x16", - argLen: 2, + name: "GreaterEqualMaskedInt16x16", + argLen: 3, generic: true, }, { - name: "LessEqualInt16x16", - argLen: 2, + name: "GreaterMaskedInt16x16", + argLen: 3, generic: true, }, { - name: "MaskedAbsoluteInt16x16", + name: "LessInt16x16", argLen: 2, generic: true, }, { - name: "MaskedAddInt16x16", - argLen: 3, - commutative: true, - generic: true, - }, - { - name: "MaskedEqualInt16x16", - argLen: 3, - commutative: true, - generic: true, - }, - { - name: "MaskedGreaterInt16x16", - argLen: 3, - generic: true, - }, - { - name: "MaskedGreaterEqualInt16x16", - argLen: 3, + name: "LessEqualInt16x16", + argLen: 2, generic: true, }, { - name: "MaskedLessInt16x16", + name: "LessEqualMaskedInt16x16", argLen: 3, generic: true, }, { - name: "MaskedLessEqualInt16x16", + name: "LessMaskedInt16x16", argLen: 3, generic: true, }, { - name: "MaskedMaxInt16x16", - argLen: 3, + name: "MaxInt16x16", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedMinInt16x16", + name: "MaxMaskedInt16x16", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedMulHighInt16x16", - argLen: 3, + name: "MinInt16x16", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedMulLowInt16x16", + name: "MinMaskedInt16x16", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedNotEqualInt16x16", - argLen: 3, + name: "MulHighInt16x16", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedPairDotProdInt16x16", - argLen: 3, - generic: true, - }, - { - name: "MaskedPopCountInt16x16", - argLen: 2, - generic: true, - }, - { - name: "MaskedSaturatedAddInt16x16", + name: "MulHighMaskedInt16x16", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedSaturatedSubInt16x16", - argLen: 3, - generic: true, - }, - { - name: "MaskedShiftLeftInt16x16", - argLen: 3, - generic: true, - }, - { - name: "MaskedShiftLeftAndFillUpperFromInt16x16", - argLen: 4, - generic: true, - }, - { - name: "MaskedShiftRightInt16x16", - argLen: 3, - generic: true, - }, - { - name: "MaskedShiftRightAndFillUpperFromInt16x16", - argLen: 4, - generic: true, - }, - { - name: "MaskedShiftRightSignExtendedInt16x16", - argLen: 3, - generic: true, - }, - { - name: "MaskedSubInt16x16", - argLen: 3, - generic: true, - }, - { - name: "MaxInt16x16", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "MinInt16x16", + name: "MulLowInt16x16", argLen: 2, commutative: true, generic: true, }, { - name: "MulHighInt16x16", - argLen: 2, + name: "MulLowMaskedInt16x16", + argLen: 3, commutative: true, generic: true, }, { - name: "MulLowInt16x16", + name: "NotEqualInt16x16", argLen: 2, commutative: true, generic: true, }, { - name: "NotEqualInt16x16", - argLen: 2, + name: "NotEqualMaskedInt16x16", + argLen: 3, commutative: true, generic: true, }, @@ -60483,6 +60432,11 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "PairDotProdMaskedInt16x16", + argLen: 3, + generic: true, + }, { name: "PairwiseAddInt16x16", argLen: 2, @@ -60498,12 +60452,23 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "PopCountMaskedInt16x16", + argLen: 2, + generic: true, + }, { name: "SaturatedAddInt16x16", argLen: 2, commutative: true, generic: true, }, + { + name: "SaturatedAddMaskedInt16x16", + argLen: 3, + commutative: true, + generic: true, + }, { name: "SaturatedPairwiseAddInt16x16", argLen: 2, @@ -60519,6 +60484,11 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "SaturatedSubMaskedInt16x16", + argLen: 3, + generic: true, + }, { name: "ShiftAllLeftInt16x16", argLen: 2, @@ -60545,188 +60515,137 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "ShiftRightInt16x16", - argLen: 2, + name: "ShiftLeftAndFillUpperFromMaskedInt16x16", + argLen: 4, generic: true, }, { - name: "ShiftRightAndFillUpperFromInt16x16", + name: "ShiftLeftMaskedInt16x16", argLen: 3, generic: true, }, { - name: "ShiftRightSignExtendedInt16x16", - argLen: 2, - generic: true, - }, - { - name: "SignInt16x16", - argLen: 2, - generic: true, - }, - { - name: "SubInt16x16", - argLen: 2, - generic: true, - }, - { - name: "XorInt16x16", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "AbsoluteInt16x32", - argLen: 1, - generic: true, - }, - { - name: "AddInt16x32", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "EqualInt16x32", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "GreaterInt16x32", - argLen: 2, - generic: true, - }, - { - name: "GreaterEqualInt16x32", - argLen: 2, - generic: true, - }, - { - name: "LessInt16x32", + name: "ShiftRightInt16x16", argLen: 2, generic: true, }, { - name: "LessEqualInt16x32", - argLen: 2, + name: "ShiftRightAndFillUpperFromInt16x16", + argLen: 3, generic: true, }, { - name: "MaskedAbsoluteInt16x32", - argLen: 2, + name: "ShiftRightAndFillUpperFromMaskedInt16x16", + argLen: 4, generic: true, }, - { - name: "MaskedAddInt16x32", - argLen: 3, - commutative: true, - generic: true, + { + name: "ShiftRightMaskedInt16x16", + argLen: 3, + generic: true, }, { - name: "MaskedEqualInt16x32", - argLen: 3, - commutative: true, - generic: true, + name: "ShiftRightSignExtendedInt16x16", + argLen: 2, + generic: true, }, { - name: "MaskedGreaterInt16x32", + name: "ShiftRightSignExtendedMaskedInt16x16", argLen: 3, generic: true, }, { - name: "MaskedGreaterEqualInt16x32", - argLen: 3, + name: "SignInt16x16", + argLen: 2, generic: true, }, { - name: "MaskedLessInt16x32", - argLen: 3, + name: "SubInt16x16", + argLen: 2, generic: true, }, { - name: "MaskedLessEqualInt16x32", + name: "SubMaskedInt16x16", argLen: 3, generic: true, }, { - name: "MaskedMaxInt16x32", - argLen: 3, + name: "XorInt16x16", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedMinInt16x32", - argLen: 3, + name: "AbsoluteInt16x32", + argLen: 1, + generic: true, + }, + { + name: "AbsoluteMaskedInt16x32", + argLen: 2, + generic: true, + }, + { + name: "AddInt16x32", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedMulHighInt16x32", + name: "AddMaskedInt16x32", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedMulLowInt16x32", - argLen: 3, + name: "EqualInt16x32", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedNotEqualInt16x32", + name: "EqualMaskedInt16x32", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedPairDotProdInt16x32", - argLen: 3, + name: "GreaterInt16x32", + argLen: 2, generic: true, }, { - name: "MaskedPopCountInt16x32", + name: "GreaterEqualInt16x32", argLen: 2, generic: true, }, { - name: "MaskedSaturatedAddInt16x32", - argLen: 3, - commutative: true, - generic: true, - }, - { - name: "MaskedSaturatedSubInt16x32", + name: "GreaterEqualMaskedInt16x32", argLen: 3, generic: true, }, { - name: "MaskedShiftLeftInt16x32", + name: "GreaterMaskedInt16x32", argLen: 3, generic: true, }, { - name: "MaskedShiftLeftAndFillUpperFromInt16x32", - argLen: 4, - generic: true, - }, - { - name: "MaskedShiftRightInt16x32", - argLen: 3, + name: "LessInt16x32", + argLen: 2, generic: true, }, { - name: "MaskedShiftRightAndFillUpperFromInt16x32", - argLen: 4, + name: "LessEqualInt16x32", + argLen: 2, generic: true, }, { - name: "MaskedShiftRightSignExtendedInt16x32", + name: "LessEqualMaskedInt16x32", argLen: 3, generic: true, }, { - name: "MaskedSubInt16x32", + name: "LessMaskedInt16x32", argLen: 3, generic: true, }, @@ -60736,51 +60655,102 @@ var opcodeTable = [...]opInfo{ commutative: true, generic: true, }, + { + name: "MaxMaskedInt16x32", + argLen: 3, + commutative: true, + generic: true, + }, { name: "MinInt16x32", argLen: 2, commutative: true, generic: true, }, + { + name: "MinMaskedInt16x32", + argLen: 3, + commutative: true, + generic: true, + }, { name: "MulHighInt16x32", argLen: 2, commutative: true, generic: true, }, + { + name: "MulHighMaskedInt16x32", + argLen: 3, + commutative: true, + generic: true, + }, { name: "MulLowInt16x32", argLen: 2, commutative: true, generic: true, }, + { + name: "MulLowMaskedInt16x32", + argLen: 3, + commutative: true, + generic: true, + }, { name: "NotEqualInt16x32", argLen: 2, commutative: true, generic: true, }, + { + name: "NotEqualMaskedInt16x32", + argLen: 3, + commutative: true, + generic: true, + }, { name: "PairDotProdInt16x32", argLen: 2, generic: true, }, + { + name: "PairDotProdMaskedInt16x32", + argLen: 3, + generic: true, + }, { name: "PopCountInt16x32", argLen: 1, generic: true, }, + { + name: "PopCountMaskedInt16x32", + argLen: 2, + generic: true, + }, { name: "SaturatedAddInt16x32", argLen: 2, commutative: true, generic: true, }, + { + name: "SaturatedAddMaskedInt16x32", + argLen: 3, + commutative: true, + generic: true, + }, { name: "SaturatedSubInt16x32", argLen: 2, generic: true, }, + { + name: "SaturatedSubMaskedInt16x32", + argLen: 3, + generic: true, + }, { name: "ShiftLeftInt16x32", argLen: 2, @@ -60791,6 +60761,16 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, + { + name: "ShiftLeftAndFillUpperFromMaskedInt16x32", + argLen: 4, + generic: true, + }, + { + name: "ShiftLeftMaskedInt16x32", + argLen: 3, + generic: true, + }, { name: "ShiftRightInt16x32", argLen: 2, @@ -60801,27 +60781,58 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, + { + name: "ShiftRightAndFillUpperFromMaskedInt16x32", + argLen: 4, + generic: true, + }, + { + name: "ShiftRightMaskedInt16x32", + argLen: 3, + generic: true, + }, { name: "ShiftRightSignExtendedInt16x32", argLen: 2, generic: true, }, + { + name: "ShiftRightSignExtendedMaskedInt16x32", + argLen: 3, + generic: true, + }, { name: "SubInt16x32", argLen: 2, generic: true, }, + { + name: "SubMaskedInt16x32", + argLen: 3, + generic: true, + }, { name: "AbsoluteInt16x8", argLen: 1, generic: true, }, + { + name: "AbsoluteMaskedInt16x8", + argLen: 2, + generic: true, + }, { name: "AddInt16x8", argLen: 2, commutative: true, generic: true, }, + { + name: "AddMaskedInt16x8", + argLen: 3, + commutative: true, + generic: true, + }, { name: "AndInt16x8", argLen: 2, @@ -60839,6 +60850,12 @@ var opcodeTable = [...]opInfo{ commutative: true, generic: true, }, + { + name: "EqualMaskedInt16x8", + argLen: 3, + commutative: true, + generic: true, + }, { name: "GreaterInt16x8", argLen: 2, @@ -60850,160 +60867,92 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "LessInt16x8", - argLen: 2, + name: "GreaterEqualMaskedInt16x8", + argLen: 3, generic: true, }, { - name: "LessEqualInt16x8", - argLen: 2, + name: "GreaterMaskedInt16x8", + argLen: 3, generic: true, }, { - name: "MaskedAbsoluteInt16x8", + name: "LessInt16x8", argLen: 2, generic: true, }, { - name: "MaskedAddInt16x8", - argLen: 3, - commutative: true, - generic: true, - }, - { - name: "MaskedEqualInt16x8", - argLen: 3, - commutative: true, - generic: true, - }, - { - name: "MaskedGreaterInt16x8", - argLen: 3, - generic: true, - }, - { - name: "MaskedGreaterEqualInt16x8", - argLen: 3, + name: "LessEqualInt16x8", + argLen: 2, generic: true, }, { - name: "MaskedLessInt16x8", + name: "LessEqualMaskedInt16x8", argLen: 3, generic: true, }, { - name: "MaskedLessEqualInt16x8", + name: "LessMaskedInt16x8", argLen: 3, generic: true, }, { - name: "MaskedMaxInt16x8", - argLen: 3, + name: "MaxInt16x8", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedMinInt16x8", + name: "MaxMaskedInt16x8", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedMulHighInt16x8", - argLen: 3, + name: "MinInt16x8", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedMulLowInt16x8", + name: "MinMaskedInt16x8", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedNotEqualInt16x8", - argLen: 3, + name: "MulHighInt16x8", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedPairDotProdInt16x8", - argLen: 3, - generic: true, - }, - { - name: "MaskedPopCountInt16x8", - argLen: 2, - generic: true, - }, - { - name: "MaskedSaturatedAddInt16x8", + name: "MulHighMaskedInt16x8", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedSaturatedSubInt16x8", - argLen: 3, - generic: true, - }, - { - name: "MaskedShiftLeftInt16x8", - argLen: 3, - generic: true, - }, - { - name: "MaskedShiftLeftAndFillUpperFromInt16x8", - argLen: 4, - generic: true, - }, - { - name: "MaskedShiftRightInt16x8", - argLen: 3, - generic: true, - }, - { - name: "MaskedShiftRightAndFillUpperFromInt16x8", - argLen: 4, - generic: true, - }, - { - name: "MaskedShiftRightSignExtendedInt16x8", - argLen: 3, - generic: true, - }, - { - name: "MaskedSubInt16x8", - argLen: 3, - generic: true, - }, - { - name: "MaxInt16x8", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "MinInt16x8", + name: "MulLowInt16x8", argLen: 2, commutative: true, generic: true, }, { - name: "MulHighInt16x8", - argLen: 2, + name: "MulLowMaskedInt16x8", + argLen: 3, commutative: true, generic: true, }, { - name: "MulLowInt16x8", + name: "NotEqualInt16x8", argLen: 2, commutative: true, generic: true, }, { - name: "NotEqualInt16x8", - argLen: 2, + name: "NotEqualMaskedInt16x8", + argLen: 3, commutative: true, generic: true, }, @@ -61018,6 +60967,11 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "PairDotProdMaskedInt16x8", + argLen: 3, + generic: true, + }, { name: "PairwiseAddInt16x8", argLen: 2, @@ -61033,12 +60987,23 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "PopCountMaskedInt16x8", + argLen: 2, + generic: true, + }, { name: "SaturatedAddInt16x8", argLen: 2, commutative: true, generic: true, }, + { + name: "SaturatedAddMaskedInt16x8", + argLen: 3, + commutative: true, + generic: true, + }, { name: "SaturatedPairwiseAddInt16x8", argLen: 2, @@ -61054,6 +61019,11 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "SaturatedSubMaskedInt16x8", + argLen: 3, + generic: true, + }, { name: "ShiftAllLeftInt16x8", argLen: 2, @@ -61080,254 +61050,207 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "ShiftRightInt16x8", - argLen: 2, + name: "ShiftLeftAndFillUpperFromMaskedInt16x8", + argLen: 4, generic: true, }, { - name: "ShiftRightAndFillUpperFromInt16x8", + name: "ShiftLeftMaskedInt16x8", argLen: 3, generic: true, }, { - name: "ShiftRightSignExtendedInt16x8", + name: "ShiftRightInt16x8", argLen: 2, generic: true, }, { - name: "SignInt16x8", - argLen: 2, + name: "ShiftRightAndFillUpperFromInt16x8", + argLen: 3, generic: true, }, { - name: "SubInt16x8", - argLen: 2, + name: "ShiftRightAndFillUpperFromMaskedInt16x8", + argLen: 4, generic: true, }, { - name: "XorInt16x8", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "AbsoluteInt32x16", - argLen: 1, + name: "ShiftRightMaskedInt16x8", + argLen: 3, generic: true, }, { - name: "AddInt32x16", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "AndInt32x16", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "AndNotInt32x16", + name: "ShiftRightSignExtendedInt16x8", argLen: 2, generic: true, }, { - name: "EqualInt32x16", - argLen: 2, - commutative: true, - generic: true, + name: "ShiftRightSignExtendedMaskedInt16x8", + argLen: 3, + generic: true, }, { - name: "GreaterInt32x16", + name: "SignInt16x8", argLen: 2, generic: true, }, { - name: "GreaterEqualInt32x16", + name: "SubInt16x8", argLen: 2, generic: true, }, { - name: "LessInt32x16", - argLen: 2, + name: "SubMaskedInt16x8", + argLen: 3, generic: true, }, { - name: "LessEqualInt32x16", - argLen: 2, + name: "XorInt16x8", + argLen: 2, + commutative: true, + generic: true, + }, + { + name: "AbsoluteInt32x16", + argLen: 1, generic: true, }, { - name: "MaskedAbsoluteInt32x16", + name: "AbsoluteMaskedInt32x16", argLen: 2, generic: true, }, { - name: "MaskedAddInt32x16", - argLen: 3, + name: "AddInt32x16", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedAndInt32x16", + name: "AddMaskedInt32x16", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedAndNotInt32x16", - argLen: 3, - generic: true, + name: "AndInt32x16", + argLen: 2, + commutative: true, + generic: true, }, { - name: "MaskedEqualInt32x16", + name: "AndMaskedInt32x16", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedGreaterInt32x16", - argLen: 3, - generic: true, - }, - { - name: "MaskedGreaterEqualInt32x16", - argLen: 3, - generic: true, - }, - { - name: "MaskedLessInt32x16", - argLen: 3, + name: "AndNotInt32x16", + argLen: 2, generic: true, }, { - name: "MaskedLessEqualInt32x16", + name: "AndNotMaskedInt32x16", argLen: 3, generic: true, }, { - name: "MaskedMaxInt32x16", - argLen: 3, - commutative: true, - generic: true, - }, - { - name: "MaskedMinInt32x16", - argLen: 3, - commutative: true, - generic: true, - }, - { - name: "MaskedMulLowInt32x16", - argLen: 3, - commutative: true, - generic: true, - }, - { - name: "MaskedNotEqualInt32x16", - argLen: 3, + name: "EqualInt32x16", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedOrInt32x16", + name: "EqualMaskedInt32x16", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedPairDotProdAccumulateInt32x16", - argLen: 4, + name: "GreaterInt32x16", + argLen: 2, generic: true, }, { - name: "MaskedPopCountInt32x16", + name: "GreaterEqualInt32x16", argLen: 2, generic: true, }, { - name: "MaskedRotateLeftInt32x16", + name: "GreaterEqualMaskedInt32x16", argLen: 3, generic: true, }, { - name: "MaskedRotateRightInt32x16", + name: "GreaterMaskedInt32x16", argLen: 3, generic: true, }, { - name: "MaskedSaturatedPairDotProdAccumulateInt32x16", - argLen: 4, + name: "LessInt32x16", + argLen: 2, generic: true, }, { - name: "MaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16", - argLen: 4, + name: "LessEqualInt32x16", + argLen: 2, generic: true, }, { - name: "MaskedShiftLeftInt32x16", + name: "LessEqualMaskedInt32x16", argLen: 3, generic: true, }, { - name: "MaskedShiftLeftAndFillUpperFromInt32x16", - argLen: 4, - generic: true, - }, - { - name: "MaskedShiftRightInt32x16", + name: "LessMaskedInt32x16", argLen: 3, generic: true, }, { - name: "MaskedShiftRightAndFillUpperFromInt32x16", - argLen: 4, - generic: true, - }, - { - name: "MaskedShiftRightSignExtendedInt32x16", - argLen: 3, - generic: true, + name: "MaxInt32x16", + argLen: 2, + commutative: true, + generic: true, }, { - name: "MaskedSubInt32x16", - argLen: 3, - generic: true, + name: "MaxMaskedInt32x16", + argLen: 3, + commutative: true, + generic: true, }, { - name: "MaskedUnsignedSignedQuadDotProdAccumulateInt32x16", - argLen: 4, - generic: true, + name: "MinInt32x16", + argLen: 2, + commutative: true, + generic: true, }, { - name: "MaskedXorInt32x16", + name: "MinMaskedInt32x16", argLen: 3, commutative: true, generic: true, }, { - name: "MaxInt32x16", + name: "MulLowInt32x16", argLen: 2, commutative: true, generic: true, }, { - name: "MinInt32x16", - argLen: 2, + name: "MulLowMaskedInt32x16", + argLen: 3, commutative: true, generic: true, }, { - name: "MulLowInt32x16", + name: "NotEqualInt32x16", argLen: 2, commutative: true, generic: true, }, { - name: "NotEqualInt32x16", - argLen: 2, + name: "NotEqualMaskedInt32x16", + argLen: 3, commutative: true, generic: true, }, @@ -61337,283 +61260,271 @@ var opcodeTable = [...]opInfo{ commutative: true, generic: true, }, + { + name: "OrMaskedInt32x16", + argLen: 3, + commutative: true, + generic: true, + }, { name: "PairDotProdAccumulateInt32x16", argLen: 3, generic: true, }, + { + name: "PairDotProdAccumulateMaskedInt32x16", + argLen: 4, + generic: true, + }, { name: "PopCountInt32x16", argLen: 1, generic: true, }, { - name: "RotateLeftInt32x16", + name: "PopCountMaskedInt32x16", argLen: 2, generic: true, }, { - name: "RotateRightInt32x16", + name: "RotateLeftInt32x16", argLen: 2, generic: true, }, { - name: "SaturatedPairDotProdAccumulateInt32x16", + name: "RotateLeftMaskedInt32x16", argLen: 3, generic: true, }, { - name: "SaturatedUnsignedSignedQuadDotProdAccumulateInt32x16", - argLen: 3, + name: "RotateRightInt32x16", + argLen: 2, generic: true, }, { - name: "ShiftLeftInt32x16", - argLen: 2, + name: "RotateRightMaskedInt32x16", + argLen: 3, generic: true, }, { - name: "ShiftLeftAndFillUpperFromInt32x16", + name: "SaturatedPairDotProdAccumulateInt32x16", argLen: 3, generic: true, }, { - name: "ShiftRightInt32x16", - argLen: 2, + name: "SaturatedPairDotProdAccumulateMaskedInt32x16", + argLen: 4, generic: true, }, { - name: "ShiftRightAndFillUpperFromInt32x16", + name: "SaturatedUnsignedSignedQuadDotProdAccumulateInt32x16", argLen: 3, generic: true, }, { - name: "ShiftRightSignExtendedInt32x16", - argLen: 2, + name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x16", + argLen: 4, generic: true, }, { - name: "SubInt32x16", + name: "ShiftLeftInt32x16", argLen: 2, generic: true, }, { - name: "UnsignedSignedQuadDotProdAccumulateInt32x16", + name: "ShiftLeftAndFillUpperFromInt32x16", argLen: 3, generic: true, }, { - name: "XorInt32x16", - argLen: 2, - commutative: true, - generic: true, + name: "ShiftLeftAndFillUpperFromMaskedInt32x16", + argLen: 4, + generic: true, }, { - name: "AbsoluteInt32x4", - argLen: 1, + name: "ShiftLeftMaskedInt32x16", + argLen: 3, generic: true, }, { - name: "AddInt32x4", - argLen: 2, - commutative: true, - generic: true, + name: "ShiftRightInt32x16", + argLen: 2, + generic: true, }, { - name: "AndInt32x4", - argLen: 2, - commutative: true, - generic: true, + name: "ShiftRightAndFillUpperFromInt32x16", + argLen: 3, + generic: true, }, { - name: "AndNotInt32x4", - argLen: 2, + name: "ShiftRightAndFillUpperFromMaskedInt32x16", + argLen: 4, generic: true, }, { - name: "EqualInt32x4", - argLen: 2, - commutative: true, - generic: true, + name: "ShiftRightMaskedInt32x16", + argLen: 3, + generic: true, }, { - name: "GreaterInt32x4", + name: "ShiftRightSignExtendedInt32x16", argLen: 2, generic: true, }, { - name: "GreaterEqualInt32x4", - argLen: 2, + name: "ShiftRightSignExtendedMaskedInt32x16", + argLen: 3, generic: true, }, { - name: "LessInt32x4", + name: "SubInt32x16", argLen: 2, generic: true, }, { - name: "LessEqualInt32x4", - argLen: 2, + name: "SubMaskedInt32x16", + argLen: 3, generic: true, }, { - name: "MaskedAbsoluteInt32x4", - argLen: 2, + name: "UnsignedSignedQuadDotProdAccumulateInt32x16", + argLen: 3, generic: true, }, { - name: "MaskedAddInt32x4", - argLen: 3, - commutative: true, - generic: true, + name: "UnsignedSignedQuadDotProdAccumulateMaskedInt32x16", + argLen: 4, + generic: true, }, { - name: "MaskedAndInt32x4", - argLen: 3, + name: "XorInt32x16", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedAndNotInt32x4", - argLen: 3, - generic: true, - }, - { - name: "MaskedEqualInt32x4", + name: "XorMaskedInt32x16", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedGreaterInt32x4", - argLen: 3, - generic: true, - }, - { - name: "MaskedGreaterEqualInt32x4", - argLen: 3, - generic: true, - }, - { - name: "MaskedLessInt32x4", - argLen: 3, + name: "AbsoluteInt32x4", + argLen: 1, generic: true, }, { - name: "MaskedLessEqualInt32x4", - argLen: 3, + name: "AbsoluteMaskedInt32x4", + argLen: 2, generic: true, }, { - name: "MaskedMaxInt32x4", - argLen: 3, - commutative: true, - generic: true, - }, - { - name: "MaskedMinInt32x4", - argLen: 3, + name: "AddInt32x4", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedMulLowInt32x4", + name: "AddMaskedInt32x4", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedNotEqualInt32x4", - argLen: 3, + name: "AndInt32x4", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedOrInt32x4", + name: "AndMaskedInt32x4", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedPairDotProdAccumulateInt32x4", - argLen: 4, - generic: true, - }, - { - name: "MaskedPopCountInt32x4", + name: "AndNotInt32x4", argLen: 2, generic: true, }, { - name: "MaskedRotateLeftInt32x4", + name: "AndNotMaskedInt32x4", argLen: 3, generic: true, }, { - name: "MaskedRotateRightInt32x4", - argLen: 3, - generic: true, + name: "EqualInt32x4", + argLen: 2, + commutative: true, + generic: true, }, { - name: "MaskedSaturatedPairDotProdAccumulateInt32x4", - argLen: 4, + name: "EqualMaskedInt32x4", + argLen: 3, + commutative: true, + generic: true, + }, + { + name: "GreaterInt32x4", + argLen: 2, generic: true, }, { - name: "MaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4", - argLen: 4, + name: "GreaterEqualInt32x4", + argLen: 2, generic: true, }, { - name: "MaskedShiftLeftInt32x4", + name: "GreaterEqualMaskedInt32x4", argLen: 3, generic: true, }, { - name: "MaskedShiftLeftAndFillUpperFromInt32x4", - argLen: 4, + name: "GreaterMaskedInt32x4", + argLen: 3, generic: true, }, { - name: "MaskedShiftRightInt32x4", - argLen: 3, + name: "LessInt32x4", + argLen: 2, generic: true, }, { - name: "MaskedShiftRightAndFillUpperFromInt32x4", - argLen: 4, + name: "LessEqualInt32x4", + argLen: 2, generic: true, }, { - name: "MaskedShiftRightSignExtendedInt32x4", + name: "LessEqualMaskedInt32x4", argLen: 3, generic: true, }, { - name: "MaskedSubInt32x4", + name: "LessMaskedInt32x4", argLen: 3, generic: true, }, { - name: "MaskedUnsignedSignedQuadDotProdAccumulateInt32x4", - argLen: 4, - generic: true, + name: "MaxInt32x4", + argLen: 2, + commutative: true, + generic: true, }, { - name: "MaskedXorInt32x4", + name: "MaxMaskedInt32x4", argLen: 3, commutative: true, generic: true, }, { - name: "MaxInt32x4", + name: "MinInt32x4", argLen: 2, commutative: true, generic: true, }, { - name: "MinInt32x4", - argLen: 2, + name: "MinMaskedInt32x4", + argLen: 3, commutative: true, generic: true, }, @@ -61629,23 +61540,46 @@ var opcodeTable = [...]opInfo{ commutative: true, generic: true, }, + { + name: "MulLowMaskedInt32x4", + argLen: 3, + commutative: true, + generic: true, + }, { name: "NotEqualInt32x4", argLen: 2, commutative: true, generic: true, }, + { + name: "NotEqualMaskedInt32x4", + argLen: 3, + commutative: true, + generic: true, + }, { name: "OrInt32x4", argLen: 2, commutative: true, generic: true, }, + { + name: "OrMaskedInt32x4", + argLen: 3, + commutative: true, + generic: true, + }, { name: "PairDotProdAccumulateInt32x4", argLen: 3, generic: true, }, + { + name: "PairDotProdAccumulateMaskedInt32x4", + argLen: 4, + generic: true, + }, { name: "PairwiseAddInt32x4", argLen: 2, @@ -61661,26 +61595,51 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "PopCountMaskedInt32x4", + argLen: 2, + generic: true, + }, { name: "RotateLeftInt32x4", argLen: 2, generic: true, }, + { + name: "RotateLeftMaskedInt32x4", + argLen: 3, + generic: true, + }, { name: "RotateRightInt32x4", argLen: 2, generic: true, }, + { + name: "RotateRightMaskedInt32x4", + argLen: 3, + generic: true, + }, { name: "SaturatedPairDotProdAccumulateInt32x4", argLen: 3, generic: true, }, + { + name: "SaturatedPairDotProdAccumulateMaskedInt32x4", + argLen: 4, + generic: true, + }, { name: "SaturatedUnsignedSignedQuadDotProdAccumulateInt32x4", argLen: 3, generic: true, }, + { + name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x4", + argLen: 4, + generic: true, + }, { name: "ShiftAllLeftInt32x4", argLen: 2, @@ -61707,247 +61666,199 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "ShiftRightInt32x4", - argLen: 2, + name: "ShiftLeftAndFillUpperFromMaskedInt32x4", + argLen: 4, generic: true, }, { - name: "ShiftRightAndFillUpperFromInt32x4", + name: "ShiftLeftMaskedInt32x4", argLen: 3, generic: true, }, { - name: "ShiftRightSignExtendedInt32x4", + name: "ShiftRightInt32x4", argLen: 2, generic: true, }, { - name: "SignInt32x4", - argLen: 2, + name: "ShiftRightAndFillUpperFromInt32x4", + argLen: 3, generic: true, }, { - name: "SubInt32x4", - argLen: 2, + name: "ShiftRightAndFillUpperFromMaskedInt32x4", + argLen: 4, generic: true, }, { - name: "UnsignedSignedQuadDotProdAccumulateInt32x4", + name: "ShiftRightMaskedInt32x4", argLen: 3, generic: true, }, { - name: "XorInt32x4", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "AbsoluteInt32x8", - argLen: 1, - generic: true, - }, - { - name: "AddInt32x8", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "AndInt32x8", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "AndNotInt32x8", + name: "ShiftRightSignExtendedInt32x4", argLen: 2, generic: true, }, { - name: "EqualInt32x8", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "GreaterInt32x8", - argLen: 2, + name: "ShiftRightSignExtendedMaskedInt32x4", + argLen: 3, generic: true, }, { - name: "GreaterEqualInt32x8", + name: "SignInt32x4", argLen: 2, generic: true, }, { - name: "LessInt32x8", + name: "SubInt32x4", argLen: 2, generic: true, }, { - name: "LessEqualInt32x8", - argLen: 2, + name: "SubMaskedInt32x4", + argLen: 3, generic: true, }, { - name: "MaskedAbsoluteInt32x8", - argLen: 2, + name: "UnsignedSignedQuadDotProdAccumulateInt32x4", + argLen: 3, generic: true, }, { - name: "MaskedAddInt32x8", - argLen: 3, - commutative: true, - generic: true, + name: "UnsignedSignedQuadDotProdAccumulateMaskedInt32x4", + argLen: 4, + generic: true, }, { - name: "MaskedAndInt32x8", - argLen: 3, + name: "XorInt32x4", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedAndNotInt32x8", - argLen: 3, - generic: true, - }, - { - name: "MaskedEqualInt32x8", + name: "XorMaskedInt32x4", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedGreaterInt32x8", - argLen: 3, - generic: true, - }, - { - name: "MaskedGreaterEqualInt32x8", - argLen: 3, - generic: true, - }, - { - name: "MaskedLessInt32x8", - argLen: 3, + name: "AbsoluteInt32x8", + argLen: 1, generic: true, }, { - name: "MaskedLessEqualInt32x8", - argLen: 3, + name: "AbsoluteMaskedInt32x8", + argLen: 2, generic: true, }, { - name: "MaskedMaxInt32x8", - argLen: 3, - commutative: true, - generic: true, - }, - { - name: "MaskedMinInt32x8", - argLen: 3, + name: "AddInt32x8", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedMulLowInt32x8", + name: "AddMaskedInt32x8", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedNotEqualInt32x8", - argLen: 3, + name: "AndInt32x8", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedOrInt32x8", + name: "AndMaskedInt32x8", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedPairDotProdAccumulateInt32x8", - argLen: 4, - generic: true, - }, - { - name: "MaskedPopCountInt32x8", + name: "AndNotInt32x8", argLen: 2, generic: true, }, { - name: "MaskedRotateLeftInt32x8", + name: "AndNotMaskedInt32x8", argLen: 3, generic: true, }, { - name: "MaskedRotateRightInt32x8", - argLen: 3, - generic: true, + name: "EqualInt32x8", + argLen: 2, + commutative: true, + generic: true, }, { - name: "MaskedSaturatedPairDotProdAccumulateInt32x8", - argLen: 4, + name: "EqualMaskedInt32x8", + argLen: 3, + commutative: true, + generic: true, + }, + { + name: "GreaterInt32x8", + argLen: 2, generic: true, }, { - name: "MaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8", - argLen: 4, + name: "GreaterEqualInt32x8", + argLen: 2, generic: true, }, { - name: "MaskedShiftLeftInt32x8", + name: "GreaterEqualMaskedInt32x8", argLen: 3, generic: true, }, { - name: "MaskedShiftLeftAndFillUpperFromInt32x8", - argLen: 4, + name: "GreaterMaskedInt32x8", + argLen: 3, generic: true, }, { - name: "MaskedShiftRightInt32x8", - argLen: 3, + name: "LessInt32x8", + argLen: 2, generic: true, }, { - name: "MaskedShiftRightAndFillUpperFromInt32x8", - argLen: 4, + name: "LessEqualInt32x8", + argLen: 2, generic: true, }, { - name: "MaskedShiftRightSignExtendedInt32x8", + name: "LessEqualMaskedInt32x8", argLen: 3, generic: true, }, { - name: "MaskedSubInt32x8", + name: "LessMaskedInt32x8", argLen: 3, generic: true, }, { - name: "MaskedUnsignedSignedQuadDotProdAccumulateInt32x8", - argLen: 4, - generic: true, + name: "MaxInt32x8", + argLen: 2, + commutative: true, + generic: true, }, { - name: "MaskedXorInt32x8", + name: "MaxMaskedInt32x8", argLen: 3, commutative: true, generic: true, }, { - name: "MaxInt32x8", + name: "MinInt32x8", argLen: 2, commutative: true, generic: true, }, { - name: "MinInt32x8", - argLen: 2, + name: "MinMaskedInt32x8", + argLen: 3, commutative: true, generic: true, }, @@ -61963,23 +61874,46 @@ var opcodeTable = [...]opInfo{ commutative: true, generic: true, }, + { + name: "MulLowMaskedInt32x8", + argLen: 3, + commutative: true, + generic: true, + }, { name: "NotEqualInt32x8", argLen: 2, commutative: true, generic: true, }, + { + name: "NotEqualMaskedInt32x8", + argLen: 3, + commutative: true, + generic: true, + }, { name: "OrInt32x8", argLen: 2, commutative: true, generic: true, }, + { + name: "OrMaskedInt32x8", + argLen: 3, + commutative: true, + generic: true, + }, { name: "PairDotProdAccumulateInt32x8", argLen: 3, generic: true, }, + { + name: "PairDotProdAccumulateMaskedInt32x8", + argLen: 4, + generic: true, + }, { name: "PairwiseAddInt32x8", argLen: 2, @@ -61995,26 +61929,51 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "PopCountMaskedInt32x8", + argLen: 2, + generic: true, + }, { name: "RotateLeftInt32x8", argLen: 2, generic: true, }, + { + name: "RotateLeftMaskedInt32x8", + argLen: 3, + generic: true, + }, { name: "RotateRightInt32x8", argLen: 2, generic: true, }, + { + name: "RotateRightMaskedInt32x8", + argLen: 3, + generic: true, + }, { name: "SaturatedPairDotProdAccumulateInt32x8", argLen: 3, generic: true, }, + { + name: "SaturatedPairDotProdAccumulateMaskedInt32x8", + argLen: 4, + generic: true, + }, { name: "SaturatedUnsignedSignedQuadDotProdAccumulateInt32x8", argLen: 3, generic: true, }, + { + name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x8", + argLen: 4, + generic: true, + }, { name: "ShiftAllLeftInt32x8", argLen: 2, @@ -62041,260 +62000,223 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "ShiftRightInt32x8", - argLen: 2, + name: "ShiftLeftAndFillUpperFromMaskedInt32x8", + argLen: 4, generic: true, }, { - name: "ShiftRightAndFillUpperFromInt32x8", + name: "ShiftLeftMaskedInt32x8", argLen: 3, generic: true, }, { - name: "ShiftRightSignExtendedInt32x8", + name: "ShiftRightInt32x8", argLen: 2, generic: true, }, { - name: "SignInt32x8", - argLen: 2, + name: "ShiftRightAndFillUpperFromInt32x8", + argLen: 3, generic: true, }, { - name: "SubInt32x8", - argLen: 2, + name: "ShiftRightAndFillUpperFromMaskedInt32x8", + argLen: 4, generic: true, }, { - name: "UnsignedSignedQuadDotProdAccumulateInt32x8", + name: "ShiftRightMaskedInt32x8", argLen: 3, generic: true, }, { - name: "XorInt32x8", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "AbsoluteInt64x2", - argLen: 1, - generic: true, - }, - { - name: "AddInt64x2", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "AndInt64x2", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "AndNotInt64x2", + name: "ShiftRightSignExtendedInt32x8", argLen: 2, generic: true, }, { - name: "EqualInt64x2", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "GreaterInt64x2", - argLen: 2, + name: "ShiftRightSignExtendedMaskedInt32x8", + argLen: 3, generic: true, }, { - name: "GreaterEqualInt64x2", + name: "SignInt32x8", argLen: 2, generic: true, }, { - name: "LessInt64x2", + name: "SubInt32x8", argLen: 2, generic: true, }, { - name: "LessEqualInt64x2", - argLen: 2, + name: "SubMaskedInt32x8", + argLen: 3, generic: true, }, { - name: "MaskedAbsoluteInt64x2", - argLen: 2, + name: "UnsignedSignedQuadDotProdAccumulateInt32x8", + argLen: 3, generic: true, }, { - name: "MaskedAddInt64x2", - argLen: 3, - commutative: true, - generic: true, + name: "UnsignedSignedQuadDotProdAccumulateMaskedInt32x8", + argLen: 4, + generic: true, }, { - name: "MaskedAndInt64x2", - argLen: 3, + name: "XorInt32x8", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedAndNotInt64x2", - argLen: 3, - generic: true, - }, - { - name: "MaskedEqualInt64x2", + name: "XorMaskedInt32x8", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedGreaterInt64x2", - argLen: 3, - generic: true, - }, - { - name: "MaskedGreaterEqualInt64x2", - argLen: 3, - generic: true, - }, - { - name: "MaskedLessInt64x2", - argLen: 3, + name: "AbsoluteInt64x2", + argLen: 1, generic: true, }, { - name: "MaskedLessEqualInt64x2", - argLen: 3, + name: "AbsoluteMaskedInt64x2", + argLen: 2, generic: true, }, { - name: "MaskedMaxInt64x2", - argLen: 3, + name: "AddInt64x2", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedMinInt64x2", + name: "AddMaskedInt64x2", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedMulEvenWidenInt64x2", - argLen: 3, + name: "AndInt64x2", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedMulLowInt64x2", + name: "AndMaskedInt64x2", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedNotEqualInt64x2", - argLen: 3, + name: "AndNotInt64x2", + argLen: 2, + generic: true, + }, + { + name: "AndNotMaskedInt64x2", + argLen: 3, + generic: true, + }, + { + name: "EqualInt64x2", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedOrInt64x2", + name: "EqualMaskedInt64x2", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedPopCountInt64x2", + name: "GreaterInt64x2", argLen: 2, generic: true, }, { - name: "MaskedRotateLeftInt64x2", - argLen: 3, + name: "GreaterEqualInt64x2", + argLen: 2, generic: true, }, { - name: "MaskedRotateRightInt64x2", + name: "GreaterEqualMaskedInt64x2", argLen: 3, generic: true, }, { - name: "MaskedShiftAllLeftInt64x2", + name: "GreaterMaskedInt64x2", argLen: 3, generic: true, }, { - name: "MaskedShiftAllRightInt64x2", - argLen: 3, + name: "LessInt64x2", + argLen: 2, generic: true, }, { - name: "MaskedShiftAllRightSignExtendedInt64x2", - argLen: 3, + name: "LessEqualInt64x2", + argLen: 2, generic: true, }, { - name: "MaskedShiftLeftInt64x2", + name: "LessEqualMaskedInt64x2", argLen: 3, generic: true, }, { - name: "MaskedShiftLeftAndFillUpperFromInt64x2", - argLen: 4, - generic: true, - }, - { - name: "MaskedShiftRightInt64x2", + name: "LessMaskedInt64x2", argLen: 3, generic: true, }, { - name: "MaskedShiftRightAndFillUpperFromInt64x2", - argLen: 4, - generic: true, + name: "MaxInt64x2", + argLen: 2, + commutative: true, + generic: true, }, { - name: "MaskedShiftRightSignExtendedInt64x2", - argLen: 3, - generic: true, + name: "MaxMaskedInt64x2", + argLen: 3, + commutative: true, + generic: true, }, { - name: "MaskedSubInt64x2", - argLen: 3, - generic: true, + name: "MinInt64x2", + argLen: 2, + commutative: true, + generic: true, }, { - name: "MaskedXorInt64x2", + name: "MinMaskedInt64x2", argLen: 3, commutative: true, generic: true, }, { - name: "MaxInt64x2", + name: "MulEvenWidenInt64x2", argLen: 2, commutative: true, generic: true, }, { - name: "MinInt64x2", - argLen: 2, + name: "MulEvenWidenMaskedInt64x2", + argLen: 3, commutative: true, generic: true, }, { - name: "MulEvenWidenInt64x2", + name: "MulLowInt64x2", argLen: 2, commutative: true, generic: true, }, { - name: "MulLowInt64x2", - argLen: 2, + name: "MulLowMaskedInt64x2", + argLen: 3, commutative: true, generic: true, }, @@ -62304,279 +62226,261 @@ var opcodeTable = [...]opInfo{ commutative: true, generic: true, }, + { + name: "NotEqualMaskedInt64x2", + argLen: 3, + commutative: true, + generic: true, + }, { name: "OrInt64x2", argLen: 2, commutative: true, generic: true, }, + { + name: "OrMaskedInt64x2", + argLen: 3, + commutative: true, + generic: true, + }, { name: "PopCountInt64x2", argLen: 1, generic: true, }, { - name: "RotateLeftInt64x2", + name: "PopCountMaskedInt64x2", argLen: 2, generic: true, }, { - name: "RotateRightInt64x2", + name: "RotateLeftInt64x2", argLen: 2, generic: true, }, { - name: "ShiftAllLeftInt64x2", - argLen: 2, + name: "RotateLeftMaskedInt64x2", + argLen: 3, generic: true, }, { - name: "ShiftAllRightInt64x2", + name: "RotateRightInt64x2", argLen: 2, generic: true, }, { - name: "ShiftAllRightSignExtendedInt64x2", - argLen: 2, + name: "RotateRightMaskedInt64x2", + argLen: 3, generic: true, }, { - name: "ShiftLeftInt64x2", + name: "ShiftAllLeftInt64x2", argLen: 2, generic: true, }, { - name: "ShiftLeftAndFillUpperFromInt64x2", + name: "ShiftAllLeftMaskedInt64x2", argLen: 3, generic: true, }, { - name: "ShiftRightInt64x2", + name: "ShiftAllRightInt64x2", argLen: 2, generic: true, }, { - name: "ShiftRightAndFillUpperFromInt64x2", + name: "ShiftAllRightMaskedInt64x2", argLen: 3, generic: true, }, { - name: "ShiftRightSignExtendedInt64x2", - argLen: 2, - generic: true, - }, - { - name: "SubInt64x2", + name: "ShiftAllRightSignExtendedInt64x2", argLen: 2, generic: true, }, { - name: "XorInt64x2", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "AbsoluteInt64x4", - argLen: 1, + name: "ShiftAllRightSignExtendedMaskedInt64x2", + argLen: 3, generic: true, }, { - name: "AddInt64x4", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "AndInt64x4", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "AndNotInt64x4", + name: "ShiftLeftInt64x2", argLen: 2, generic: true, }, { - name: "EqualInt64x4", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "GreaterInt64x4", - argLen: 2, + name: "ShiftLeftAndFillUpperFromInt64x2", + argLen: 3, generic: true, }, { - name: "GreaterEqualInt64x4", - argLen: 2, + name: "ShiftLeftAndFillUpperFromMaskedInt64x2", + argLen: 4, generic: true, }, { - name: "LessInt64x4", - argLen: 2, + name: "ShiftLeftMaskedInt64x2", + argLen: 3, generic: true, }, { - name: "LessEqualInt64x4", + name: "ShiftRightInt64x2", argLen: 2, generic: true, }, { - name: "MaskedAbsoluteInt64x4", - argLen: 2, + name: "ShiftRightAndFillUpperFromInt64x2", + argLen: 3, generic: true, }, { - name: "MaskedAddInt64x4", - argLen: 3, - commutative: true, - generic: true, - }, - { - name: "MaskedAndInt64x4", - argLen: 3, - commutative: true, - generic: true, + name: "ShiftRightAndFillUpperFromMaskedInt64x2", + argLen: 4, + generic: true, }, { - name: "MaskedAndNotInt64x4", + name: "ShiftRightMaskedInt64x2", argLen: 3, generic: true, }, { - name: "MaskedEqualInt64x4", - argLen: 3, - commutative: true, - generic: true, - }, - { - name: "MaskedGreaterInt64x4", - argLen: 3, + name: "ShiftRightSignExtendedInt64x2", + argLen: 2, generic: true, }, { - name: "MaskedGreaterEqualInt64x4", + name: "ShiftRightSignExtendedMaskedInt64x2", argLen: 3, generic: true, }, { - name: "MaskedLessInt64x4", - argLen: 3, + name: "SubInt64x2", + argLen: 2, generic: true, }, { - name: "MaskedLessEqualInt64x4", + name: "SubMaskedInt64x2", argLen: 3, generic: true, }, { - name: "MaskedMaxInt64x4", - argLen: 3, + name: "XorInt64x2", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedMinInt64x4", + name: "XorMaskedInt64x2", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedMulEvenWidenInt64x4", - argLen: 3, + name: "AbsoluteInt64x4", + argLen: 1, + generic: true, + }, + { + name: "AbsoluteMaskedInt64x4", + argLen: 2, + generic: true, + }, + { + name: "AddInt64x4", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedMulLowInt64x4", + name: "AddMaskedInt64x4", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedNotEqualInt64x4", - argLen: 3, + name: "AndInt64x4", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedOrInt64x4", + name: "AndMaskedInt64x4", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedPopCountInt64x4", + name: "AndNotInt64x4", argLen: 2, generic: true, }, { - name: "MaskedRotateLeftInt64x4", + name: "AndNotMaskedInt64x4", argLen: 3, generic: true, }, { - name: "MaskedRotateRightInt64x4", - argLen: 3, - generic: true, + name: "EqualInt64x4", + argLen: 2, + commutative: true, + generic: true, }, { - name: "MaskedShiftAllLeftInt64x4", - argLen: 3, - generic: true, + name: "EqualMaskedInt64x4", + argLen: 3, + commutative: true, + generic: true, }, { - name: "MaskedShiftAllRightInt64x4", - argLen: 3, + name: "GreaterInt64x4", + argLen: 2, generic: true, }, { - name: "MaskedShiftAllRightSignExtendedInt64x4", - argLen: 3, + name: "GreaterEqualInt64x4", + argLen: 2, generic: true, }, { - name: "MaskedShiftLeftInt64x4", + name: "GreaterEqualMaskedInt64x4", argLen: 3, generic: true, }, { - name: "MaskedShiftLeftAndFillUpperFromInt64x4", - argLen: 4, + name: "GreaterMaskedInt64x4", + argLen: 3, generic: true, }, { - name: "MaskedShiftRightInt64x4", - argLen: 3, + name: "LessInt64x4", + argLen: 2, generic: true, }, { - name: "MaskedShiftRightAndFillUpperFromInt64x4", - argLen: 4, + name: "LessEqualInt64x4", + argLen: 2, generic: true, }, { - name: "MaskedShiftRightSignExtendedInt64x4", + name: "LessEqualMaskedInt64x4", argLen: 3, generic: true, }, { - name: "MaskedSubInt64x4", + name: "LessMaskedInt64x4", argLen: 3, generic: true, }, { - name: "MaskedXorInt64x4", - argLen: 3, + name: "MaxInt64x4", + argLen: 2, commutative: true, generic: true, }, { - name: "MaxInt64x4", - argLen: 2, + name: "MaxMaskedInt64x4", + argLen: 3, commutative: true, generic: true, }, @@ -62586,315 +62490,333 @@ var opcodeTable = [...]opInfo{ commutative: true, generic: true, }, + { + name: "MinMaskedInt64x4", + argLen: 3, + commutative: true, + generic: true, + }, { name: "MulEvenWidenInt64x4", argLen: 2, commutative: true, generic: true, }, + { + name: "MulEvenWidenMaskedInt64x4", + argLen: 3, + commutative: true, + generic: true, + }, { name: "MulLowInt64x4", argLen: 2, commutative: true, generic: true, }, + { + name: "MulLowMaskedInt64x4", + argLen: 3, + commutative: true, + generic: true, + }, { name: "NotEqualInt64x4", argLen: 2, commutative: true, generic: true, }, + { + name: "NotEqualMaskedInt64x4", + argLen: 3, + commutative: true, + generic: true, + }, { name: "OrInt64x4", argLen: 2, commutative: true, generic: true, }, + { + name: "OrMaskedInt64x4", + argLen: 3, + commutative: true, + generic: true, + }, { name: "PopCountInt64x4", argLen: 1, generic: true, }, { - name: "RotateLeftInt64x4", + name: "PopCountMaskedInt64x4", argLen: 2, generic: true, }, { - name: "RotateRightInt64x4", + name: "RotateLeftInt64x4", argLen: 2, generic: true, }, { - name: "ShiftAllLeftInt64x4", - argLen: 2, + name: "RotateLeftMaskedInt64x4", + argLen: 3, generic: true, }, { - name: "ShiftAllRightInt64x4", + name: "RotateRightInt64x4", argLen: 2, generic: true, }, { - name: "ShiftAllRightSignExtendedInt64x4", - argLen: 2, + name: "RotateRightMaskedInt64x4", + argLen: 3, generic: true, }, { - name: "ShiftLeftInt64x4", + name: "ShiftAllLeftInt64x4", argLen: 2, generic: true, }, { - name: "ShiftLeftAndFillUpperFromInt64x4", + name: "ShiftAllLeftMaskedInt64x4", argLen: 3, generic: true, }, { - name: "ShiftRightInt64x4", + name: "ShiftAllRightInt64x4", argLen: 2, generic: true, }, { - name: "ShiftRightAndFillUpperFromInt64x4", + name: "ShiftAllRightMaskedInt64x4", argLen: 3, generic: true, }, { - name: "ShiftRightSignExtendedInt64x4", + name: "ShiftAllRightSignExtendedInt64x4", argLen: 2, generic: true, }, { - name: "SubInt64x4", - argLen: 2, + name: "ShiftAllRightSignExtendedMaskedInt64x4", + argLen: 3, generic: true, }, { - name: "XorInt64x4", - argLen: 2, - commutative: true, - generic: true, + name: "ShiftLeftInt64x4", + argLen: 2, + generic: true, }, { - name: "AbsoluteInt64x8", - argLen: 1, + name: "ShiftLeftAndFillUpperFromInt64x4", + argLen: 3, generic: true, }, { - name: "AddInt64x8", - argLen: 2, - commutative: true, - generic: true, + name: "ShiftLeftAndFillUpperFromMaskedInt64x4", + argLen: 4, + generic: true, }, { - name: "AndInt64x8", - argLen: 2, - commutative: true, - generic: true, + name: "ShiftLeftMaskedInt64x4", + argLen: 3, + generic: true, }, { - name: "AndNotInt64x8", + name: "ShiftRightInt64x4", argLen: 2, generic: true, }, { - name: "EqualInt64x8", - argLen: 2, - commutative: true, - generic: true, + name: "ShiftRightAndFillUpperFromInt64x4", + argLen: 3, + generic: true, }, { - name: "GreaterInt64x8", - argLen: 2, + name: "ShiftRightAndFillUpperFromMaskedInt64x4", + argLen: 4, generic: true, }, { - name: "GreaterEqualInt64x8", - argLen: 2, + name: "ShiftRightMaskedInt64x4", + argLen: 3, generic: true, }, { - name: "LessInt64x8", + name: "ShiftRightSignExtendedInt64x4", argLen: 2, generic: true, }, { - name: "LessEqualInt64x8", - argLen: 2, + name: "ShiftRightSignExtendedMaskedInt64x4", + argLen: 3, generic: true, }, { - name: "MaskedAbsoluteInt64x8", + name: "SubInt64x4", argLen: 2, generic: true, }, { - name: "MaskedAddInt64x8", - argLen: 3, - commutative: true, - generic: true, + name: "SubMaskedInt64x4", + argLen: 3, + generic: true, }, { - name: "MaskedAndInt64x8", - argLen: 3, + name: "XorInt64x4", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedAndNotInt64x8", - argLen: 3, - generic: true, - }, - { - name: "MaskedEqualInt64x8", + name: "XorMaskedInt64x4", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedGreaterInt64x8", - argLen: 3, - generic: true, - }, - { - name: "MaskedGreaterEqualInt64x8", - argLen: 3, - generic: true, - }, - { - name: "MaskedLessInt64x8", - argLen: 3, + name: "AbsoluteInt64x8", + argLen: 1, generic: true, }, { - name: "MaskedLessEqualInt64x8", - argLen: 3, + name: "AbsoluteMaskedInt64x8", + argLen: 2, generic: true, }, { - name: "MaskedMaxInt64x8", - argLen: 3, + name: "AddInt64x8", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedMinInt64x8", + name: "AddMaskedInt64x8", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedMulEvenWidenInt64x8", - argLen: 3, + name: "AndInt64x8", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedMulLowInt64x8", + name: "AndMaskedInt64x8", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedNotEqualInt64x8", - argLen: 3, + name: "AndNotInt64x8", + argLen: 2, + generic: true, + }, + { + name: "AndNotMaskedInt64x8", + argLen: 3, + generic: true, + }, + { + name: "EqualInt64x8", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedOrInt64x8", + name: "EqualMaskedInt64x8", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedPopCountInt64x8", + name: "GreaterInt64x8", argLen: 2, generic: true, }, { - name: "MaskedRotateLeftInt64x8", - argLen: 3, + name: "GreaterEqualInt64x8", + argLen: 2, generic: true, }, { - name: "MaskedRotateRightInt64x8", + name: "GreaterEqualMaskedInt64x8", argLen: 3, generic: true, }, { - name: "MaskedShiftAllLeftInt64x8", + name: "GreaterMaskedInt64x8", argLen: 3, generic: true, }, { - name: "MaskedShiftAllRightInt64x8", - argLen: 3, + name: "LessInt64x8", + argLen: 2, generic: true, }, { - name: "MaskedShiftAllRightSignExtendedInt64x8", - argLen: 3, + name: "LessEqualInt64x8", + argLen: 2, generic: true, }, { - name: "MaskedShiftLeftInt64x8", + name: "LessEqualMaskedInt64x8", argLen: 3, generic: true, }, { - name: "MaskedShiftLeftAndFillUpperFromInt64x8", - argLen: 4, - generic: true, - }, - { - name: "MaskedShiftRightInt64x8", + name: "LessMaskedInt64x8", argLen: 3, generic: true, }, { - name: "MaskedShiftRightAndFillUpperFromInt64x8", - argLen: 4, - generic: true, + name: "MaxInt64x8", + argLen: 2, + commutative: true, + generic: true, }, { - name: "MaskedShiftRightSignExtendedInt64x8", - argLen: 3, - generic: true, + name: "MaxMaskedInt64x8", + argLen: 3, + commutative: true, + generic: true, }, { - name: "MaskedSubInt64x8", - argLen: 3, - generic: true, + name: "MinInt64x8", + argLen: 2, + commutative: true, + generic: true, }, { - name: "MaskedXorInt64x8", + name: "MinMaskedInt64x8", argLen: 3, commutative: true, generic: true, }, { - name: "MaxInt64x8", + name: "MulEvenWidenInt64x8", argLen: 2, commutative: true, generic: true, }, { - name: "MinInt64x8", - argLen: 2, + name: "MulEvenWidenMaskedInt64x8", + argLen: 3, commutative: true, generic: true, }, { - name: "MulEvenWidenInt64x8", + name: "MulLowInt64x8", argLen: 2, commutative: true, generic: true, }, { - name: "MulLowInt64x8", - argLen: 2, + name: "MulLowMaskedInt64x8", + argLen: 3, commutative: true, generic: true, }, @@ -62904,42 +62826,84 @@ var opcodeTable = [...]opInfo{ commutative: true, generic: true, }, + { + name: "NotEqualMaskedInt64x8", + argLen: 3, + commutative: true, + generic: true, + }, { name: "OrInt64x8", argLen: 2, commutative: true, generic: true, }, + { + name: "OrMaskedInt64x8", + argLen: 3, + commutative: true, + generic: true, + }, { name: "PopCountInt64x8", argLen: 1, generic: true, }, + { + name: "PopCountMaskedInt64x8", + argLen: 2, + generic: true, + }, { name: "RotateLeftInt64x8", argLen: 2, generic: true, }, + { + name: "RotateLeftMaskedInt64x8", + argLen: 3, + generic: true, + }, { name: "RotateRightInt64x8", argLen: 2, generic: true, }, + { + name: "RotateRightMaskedInt64x8", + argLen: 3, + generic: true, + }, { name: "ShiftAllLeftInt64x8", argLen: 2, generic: true, }, + { + name: "ShiftAllLeftMaskedInt64x8", + argLen: 3, + generic: true, + }, { name: "ShiftAllRightInt64x8", argLen: 2, generic: true, }, + { + name: "ShiftAllRightMaskedInt64x8", + argLen: 3, + generic: true, + }, { name: "ShiftAllRightSignExtendedInt64x8", argLen: 2, generic: true, }, + { + name: "ShiftAllRightSignExtendedMaskedInt64x8", + argLen: 3, + generic: true, + }, { name: "ShiftLeftInt64x8", argLen: 2, @@ -62950,6 +62914,16 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, + { + name: "ShiftLeftAndFillUpperFromMaskedInt64x8", + argLen: 4, + generic: true, + }, + { + name: "ShiftLeftMaskedInt64x8", + argLen: 3, + generic: true, + }, { name: "ShiftRightInt64x8", argLen: 2, @@ -62960,33 +62934,70 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, + { + name: "ShiftRightAndFillUpperFromMaskedInt64x8", + argLen: 4, + generic: true, + }, + { + name: "ShiftRightMaskedInt64x8", + argLen: 3, + generic: true, + }, { name: "ShiftRightSignExtendedInt64x8", argLen: 2, generic: true, }, + { + name: "ShiftRightSignExtendedMaskedInt64x8", + argLen: 3, + generic: true, + }, { name: "SubInt64x8", argLen: 2, generic: true, }, + { + name: "SubMaskedInt64x8", + argLen: 3, + generic: true, + }, { name: "XorInt64x8", argLen: 2, commutative: true, generic: true, }, + { + name: "XorMaskedInt64x8", + argLen: 3, + commutative: true, + generic: true, + }, { name: "AbsoluteInt8x16", argLen: 1, generic: true, }, + { + name: "AbsoluteMaskedInt8x16", + argLen: 2, + generic: true, + }, { name: "AddInt8x16", argLen: 2, commutative: true, generic: true, }, + { + name: "AddMaskedInt8x16", + argLen: 3, + commutative: true, + generic: true, + }, { name: "AndInt8x16", argLen: 2, @@ -63004,6 +63015,12 @@ var opcodeTable = [...]opInfo{ commutative: true, generic: true, }, + { + name: "EqualMaskedInt8x16", + argLen: 3, + commutative: true, + generic: true, + }, { name: "GreaterInt8x16", argLen: 2, @@ -63015,106 +63032,68 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "LessInt8x16", - argLen: 2, + name: "GreaterEqualMaskedInt8x16", + argLen: 3, generic: true, }, { - name: "LessEqualInt8x16", - argLen: 2, + name: "GreaterMaskedInt8x16", + argLen: 3, generic: true, }, { - name: "MaskedAbsoluteInt8x16", + name: "LessInt8x16", argLen: 2, generic: true, }, { - name: "MaskedAddInt8x16", - argLen: 3, - commutative: true, - generic: true, - }, - { - name: "MaskedEqualInt8x16", - argLen: 3, - commutative: true, - generic: true, - }, - { - name: "MaskedGreaterInt8x16", - argLen: 3, - generic: true, - }, - { - name: "MaskedGreaterEqualInt8x16", - argLen: 3, + name: "LessEqualInt8x16", + argLen: 2, generic: true, }, { - name: "MaskedLessInt8x16", + name: "LessEqualMaskedInt8x16", argLen: 3, generic: true, }, { - name: "MaskedLessEqualInt8x16", + name: "LessMaskedInt8x16", argLen: 3, generic: true, }, { - name: "MaskedMaxInt8x16", - argLen: 3, + name: "MaxInt8x16", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedMinInt8x16", + name: "MaxMaskedInt8x16", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedNotEqualInt8x16", - argLen: 3, + name: "MinInt8x16", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedPopCountInt8x16", - argLen: 2, - generic: true, - }, - { - name: "MaskedSaturatedAddInt8x16", + name: "MinMaskedInt8x16", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedSaturatedSubInt8x16", - argLen: 3, - generic: true, - }, - { - name: "MaskedSubInt8x16", - argLen: 3, - generic: true, - }, - { - name: "MaxInt8x16", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "MinInt8x16", + name: "NotEqualInt8x16", argLen: 2, commutative: true, generic: true, }, { - name: "NotEqualInt8x16", - argLen: 2, + name: "NotEqualMaskedInt8x16", + argLen: 3, commutative: true, generic: true, }, @@ -63129,17 +63108,33 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "PopCountMaskedInt8x16", + argLen: 2, + generic: true, + }, { name: "SaturatedAddInt8x16", argLen: 2, commutative: true, generic: true, }, + { + name: "SaturatedAddMaskedInt8x16", + argLen: 3, + commutative: true, + generic: true, + }, { name: "SaturatedSubInt8x16", argLen: 2, generic: true, }, + { + name: "SaturatedSubMaskedInt8x16", + argLen: 3, + generic: true, + }, { name: "SignInt8x16", argLen: 2, @@ -63150,6 +63145,11 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "SubMaskedInt8x16", + argLen: 3, + generic: true, + }, { name: "XorInt8x16", argLen: 2, @@ -63161,12 +63161,23 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "AbsoluteMaskedInt8x32", + argLen: 2, + generic: true, + }, { name: "AddInt8x32", argLen: 2, commutative: true, generic: true, }, + { + name: "AddMaskedInt8x32", + argLen: 3, + commutative: true, + generic: true, + }, { name: "AndInt8x32", argLen: 2, @@ -63184,6 +63195,12 @@ var opcodeTable = [...]opInfo{ commutative: true, generic: true, }, + { + name: "EqualMaskedInt8x32", + argLen: 3, + commutative: true, + generic: true, + }, { name: "GreaterInt8x32", argLen: 2, @@ -63195,106 +63212,68 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "LessInt8x32", - argLen: 2, + name: "GreaterEqualMaskedInt8x32", + argLen: 3, generic: true, }, { - name: "LessEqualInt8x32", - argLen: 2, + name: "GreaterMaskedInt8x32", + argLen: 3, generic: true, }, { - name: "MaskedAbsoluteInt8x32", + name: "LessInt8x32", argLen: 2, generic: true, }, { - name: "MaskedAddInt8x32", - argLen: 3, - commutative: true, - generic: true, - }, - { - name: "MaskedEqualInt8x32", - argLen: 3, - commutative: true, - generic: true, - }, - { - name: "MaskedGreaterInt8x32", - argLen: 3, - generic: true, - }, - { - name: "MaskedGreaterEqualInt8x32", - argLen: 3, + name: "LessEqualInt8x32", + argLen: 2, generic: true, }, { - name: "MaskedLessInt8x32", + name: "LessEqualMaskedInt8x32", argLen: 3, generic: true, }, { - name: "MaskedLessEqualInt8x32", + name: "LessMaskedInt8x32", argLen: 3, generic: true, }, { - name: "MaskedMaxInt8x32", - argLen: 3, + name: "MaxInt8x32", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedMinInt8x32", + name: "MaxMaskedInt8x32", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedNotEqualInt8x32", - argLen: 3, + name: "MinInt8x32", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedPopCountInt8x32", - argLen: 2, - generic: true, - }, - { - name: "MaskedSaturatedAddInt8x32", + name: "MinMaskedInt8x32", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedSaturatedSubInt8x32", - argLen: 3, - generic: true, - }, - { - name: "MaskedSubInt8x32", - argLen: 3, - generic: true, - }, - { - name: "MaxInt8x32", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "MinInt8x32", + name: "NotEqualInt8x32", argLen: 2, commutative: true, generic: true, }, { - name: "NotEqualInt8x32", - argLen: 2, + name: "NotEqualMaskedInt8x32", + argLen: 3, commutative: true, generic: true, }, @@ -63309,17 +63288,33 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "PopCountMaskedInt8x32", + argLen: 2, + generic: true, + }, { name: "SaturatedAddInt8x32", argLen: 2, commutative: true, generic: true, }, + { + name: "SaturatedAddMaskedInt8x32", + argLen: 3, + commutative: true, + generic: true, + }, { name: "SaturatedSubInt8x32", argLen: 2, generic: true, }, + { + name: "SaturatedSubMaskedInt8x32", + argLen: 3, + generic: true, + }, { name: "SignInt8x32", argLen: 2, @@ -63330,6 +63325,11 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "SubMaskedInt8x32", + argLen: 3, + generic: true, + }, { name: "XorInt8x32", argLen: 2, @@ -63341,6 +63341,11 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "AbsoluteMaskedInt8x64", + argLen: 2, + generic: true, + }, { name: "AddInt8x64", argLen: 2, @@ -63348,104 +63353,60 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "EqualInt8x64", - argLen: 2, + name: "AddMaskedInt8x64", + argLen: 3, commutative: true, generic: true, }, { - name: "GreaterInt8x64", - argLen: 2, - generic: true, - }, - { - name: "GreaterEqualInt8x64", - argLen: 2, - generic: true, - }, - { - name: "LessInt8x64", - argLen: 2, - generic: true, - }, - { - name: "LessEqualInt8x64", - argLen: 2, - generic: true, - }, - { - name: "MaskedAbsoluteInt8x64", - argLen: 2, - generic: true, - }, - { - name: "MaskedAddInt8x64", - argLen: 3, + name: "EqualInt8x64", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedEqualInt8x64", + name: "EqualMaskedInt8x64", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedGreaterInt8x64", - argLen: 3, + name: "GreaterInt8x64", + argLen: 2, generic: true, }, { - name: "MaskedGreaterEqualInt8x64", - argLen: 3, + name: "GreaterEqualInt8x64", + argLen: 2, generic: true, }, { - name: "MaskedLessInt8x64", + name: "GreaterEqualMaskedInt8x64", argLen: 3, generic: true, }, { - name: "MaskedLessEqualInt8x64", + name: "GreaterMaskedInt8x64", argLen: 3, generic: true, }, { - name: "MaskedMaxInt8x64", - argLen: 3, - commutative: true, - generic: true, - }, - { - name: "MaskedMinInt8x64", - argLen: 3, - commutative: true, - generic: true, - }, - { - name: "MaskedNotEqualInt8x64", - argLen: 3, - commutative: true, - generic: true, - }, - { - name: "MaskedPopCountInt8x64", + name: "LessInt8x64", argLen: 2, generic: true, }, { - name: "MaskedSaturatedAddInt8x64", - argLen: 3, - commutative: true, - generic: true, + name: "LessEqualInt8x64", + argLen: 2, + generic: true, }, { - name: "MaskedSaturatedSubInt8x64", + name: "LessEqualMaskedInt8x64", argLen: 3, generic: true, }, { - name: "MaskedSubInt8x64", + name: "LessMaskedInt8x64", argLen: 3, generic: true, }, @@ -63456,192 +63417,161 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "MinInt8x64", - argLen: 2, + name: "MaxMaskedInt8x64", + argLen: 3, commutative: true, generic: true, }, { - name: "NotEqualInt8x64", + name: "MinInt8x64", argLen: 2, commutative: true, generic: true, }, { - name: "PopCountInt8x64", - argLen: 1, - generic: true, - }, - { - name: "SaturatedAddInt8x64", - argLen: 2, + name: "MinMaskedInt8x64", + argLen: 3, commutative: true, generic: true, }, { - name: "SaturatedSubInt8x64", - argLen: 2, - generic: true, - }, - { - name: "SubInt8x64", - argLen: 2, - generic: true, - }, - { - name: "AddUint16x16", + name: "NotEqualInt8x64", argLen: 2, commutative: true, generic: true, }, { - name: "AndUint16x16", - argLen: 2, + name: "NotEqualMaskedInt8x64", + argLen: 3, commutative: true, generic: true, }, { - name: "AndNotUint16x16", + name: "PopCountInt8x64", + argLen: 1, + generic: true, + }, + { + name: "PopCountMaskedInt8x64", argLen: 2, generic: true, }, { - name: "AverageUint16x16", + name: "SaturatedAddInt8x64", argLen: 2, commutative: true, generic: true, }, { - name: "EqualUint16x16", - argLen: 2, + name: "SaturatedAddMaskedInt8x64", + argLen: 3, commutative: true, generic: true, }, { - name: "GreaterUint16x16", + name: "SaturatedSubInt8x64", argLen: 2, generic: true, }, { - name: "GreaterEqualUint16x16", - argLen: 2, + name: "SaturatedSubMaskedInt8x64", + argLen: 3, generic: true, }, { - name: "LessUint16x16", + name: "SubInt8x64", argLen: 2, generic: true, }, { - name: "LessEqualUint16x16", - argLen: 2, + name: "SubMaskedInt8x64", + argLen: 3, generic: true, }, { - name: "MaskedAddUint16x16", - argLen: 3, + name: "AddUint16x16", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedAverageUint16x16", + name: "AddMaskedUint16x16", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedEqualUint16x16", - argLen: 3, + name: "AndUint16x16", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedGreaterUint16x16", - argLen: 3, - generic: true, - }, - { - name: "MaskedGreaterEqualUint16x16", - argLen: 3, - generic: true, - }, - { - name: "MaskedLessUint16x16", - argLen: 3, - generic: true, - }, - { - name: "MaskedLessEqualUint16x16", - argLen: 3, + name: "AndNotUint16x16", + argLen: 2, generic: true, }, { - name: "MaskedMaxUint16x16", - argLen: 3, + name: "AverageUint16x16", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedMinUint16x16", + name: "AverageMaskedUint16x16", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedMulHighUint16x16", - argLen: 3, + name: "EqualUint16x16", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedNotEqualUint16x16", + name: "EqualMaskedUint16x16", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedPopCountUint16x16", + name: "GreaterUint16x16", argLen: 2, generic: true, }, { - name: "MaskedSaturatedAddUint16x16", - argLen: 3, - commutative: true, - generic: true, - }, - { - name: "MaskedSaturatedSubUint16x16", - argLen: 3, + name: "GreaterEqualUint16x16", + argLen: 2, generic: true, }, { - name: "MaskedShiftLeftUint16x16", + name: "GreaterEqualMaskedUint16x16", argLen: 3, generic: true, }, { - name: "MaskedShiftLeftAndFillUpperFromUint16x16", - argLen: 4, + name: "GreaterMaskedUint16x16", + argLen: 3, generic: true, }, { - name: "MaskedShiftRightUint16x16", - argLen: 3, + name: "LessUint16x16", + argLen: 2, generic: true, }, { - name: "MaskedShiftRightAndFillUpperFromUint16x16", - argLen: 4, + name: "LessEqualUint16x16", + argLen: 2, generic: true, }, { - name: "MaskedShiftRightSignExtendedUint16x16", + name: "LessEqualMaskedUint16x16", argLen: 3, generic: true, }, { - name: "MaskedSubUint16x16", + name: "LessMaskedUint16x16", argLen: 3, generic: true, }, @@ -63651,24 +63581,48 @@ var opcodeTable = [...]opInfo{ commutative: true, generic: true, }, + { + name: "MaxMaskedUint16x16", + argLen: 3, + commutative: true, + generic: true, + }, { name: "MinUint16x16", argLen: 2, commutative: true, generic: true, }, + { + name: "MinMaskedUint16x16", + argLen: 3, + commutative: true, + generic: true, + }, { name: "MulHighUint16x16", argLen: 2, commutative: true, generic: true, }, + { + name: "MulHighMaskedUint16x16", + argLen: 3, + commutative: true, + generic: true, + }, { name: "NotEqualUint16x16", argLen: 2, commutative: true, generic: true, }, + { + name: "NotEqualMaskedUint16x16", + argLen: 3, + commutative: true, + generic: true, + }, { name: "OrUint16x16", argLen: 2, @@ -63690,17 +63644,33 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "PopCountMaskedUint16x16", + argLen: 2, + generic: true, + }, { name: "SaturatedAddUint16x16", argLen: 2, commutative: true, generic: true, }, + { + name: "SaturatedAddMaskedUint16x16", + argLen: 3, + commutative: true, + generic: true, + }, { name: "SaturatedSubUint16x16", argLen: 2, generic: true, }, + { + name: "SaturatedSubMaskedUint16x16", + argLen: 3, + generic: true, + }, { name: "ShiftAllLeftUint16x16", argLen: 2, @@ -63722,174 +63692,134 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "ShiftRightUint16x16", - argLen: 2, + name: "ShiftLeftAndFillUpperFromMaskedUint16x16", + argLen: 4, generic: true, }, { - name: "ShiftRightAndFillUpperFromUint16x16", + name: "ShiftLeftMaskedUint16x16", argLen: 3, generic: true, }, { - name: "ShiftRightSignExtendedUint16x16", + name: "ShiftRightUint16x16", argLen: 2, generic: true, }, { - name: "SubUint16x16", - argLen: 2, + name: "ShiftRightAndFillUpperFromUint16x16", + argLen: 3, generic: true, }, { - name: "XorUint16x16", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "AddUint16x32", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "AverageUint16x32", - argLen: 2, - commutative: true, - generic: true, + name: "ShiftRightAndFillUpperFromMaskedUint16x16", + argLen: 4, + generic: true, }, { - name: "EqualUint16x32", - argLen: 2, - commutative: true, - generic: true, + name: "ShiftRightMaskedUint16x16", + argLen: 3, + generic: true, }, { - name: "GreaterUint16x32", + name: "ShiftRightSignExtendedUint16x16", argLen: 2, generic: true, }, { - name: "GreaterEqualUint16x32", - argLen: 2, + name: "ShiftRightSignExtendedMaskedUint16x16", + argLen: 3, generic: true, }, { - name: "LessUint16x32", + name: "SubUint16x16", argLen: 2, generic: true, }, { - name: "LessEqualUint16x32", - argLen: 2, + name: "SubMaskedUint16x16", + argLen: 3, generic: true, }, { - name: "MaskedAddUint16x32", - argLen: 3, + name: "XorUint16x16", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedAverageUint16x32", - argLen: 3, + name: "AddUint16x32", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedEqualUint16x32", + name: "AddMaskedUint16x32", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedGreaterUint16x32", - argLen: 3, - generic: true, - }, - { - name: "MaskedGreaterEqualUint16x32", - argLen: 3, - generic: true, - }, - { - name: "MaskedLessUint16x32", - argLen: 3, - generic: true, - }, - { - name: "MaskedLessEqualUint16x32", - argLen: 3, - generic: true, - }, - { - name: "MaskedMaxUint16x32", - argLen: 3, + name: "AverageUint16x32", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedMinUint16x32", + name: "AverageMaskedUint16x32", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedMulHighUint16x32", - argLen: 3, + name: "EqualUint16x32", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedNotEqualUint16x32", + name: "EqualMaskedUint16x32", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedPopCountUint16x32", + name: "GreaterUint16x32", argLen: 2, generic: true, }, { - name: "MaskedSaturatedAddUint16x32", - argLen: 3, - commutative: true, - generic: true, - }, - { - name: "MaskedSaturatedSubUint16x32", - argLen: 3, + name: "GreaterEqualUint16x32", + argLen: 2, generic: true, }, { - name: "MaskedShiftLeftUint16x32", + name: "GreaterEqualMaskedUint16x32", argLen: 3, generic: true, }, { - name: "MaskedShiftLeftAndFillUpperFromUint16x32", - argLen: 4, + name: "GreaterMaskedUint16x32", + argLen: 3, generic: true, }, { - name: "MaskedShiftRightUint16x32", - argLen: 3, + name: "LessUint16x32", + argLen: 2, generic: true, }, { - name: "MaskedShiftRightAndFillUpperFromUint16x32", - argLen: 4, + name: "LessEqualUint16x32", + argLen: 2, generic: true, }, { - name: "MaskedShiftRightSignExtendedUint16x32", + name: "LessEqualMaskedUint16x32", argLen: 3, generic: true, }, { - name: "MaskedSubUint16x32", + name: "LessMaskedUint16x32", argLen: 3, generic: true, }, @@ -63899,40 +63829,80 @@ var opcodeTable = [...]opInfo{ commutative: true, generic: true, }, + { + name: "MaxMaskedUint16x32", + argLen: 3, + commutative: true, + generic: true, + }, { name: "MinUint16x32", argLen: 2, commutative: true, generic: true, }, + { + name: "MinMaskedUint16x32", + argLen: 3, + commutative: true, + generic: true, + }, { name: "MulHighUint16x32", argLen: 2, commutative: true, generic: true, }, + { + name: "MulHighMaskedUint16x32", + argLen: 3, + commutative: true, + generic: true, + }, { name: "NotEqualUint16x32", argLen: 2, commutative: true, generic: true, }, + { + name: "NotEqualMaskedUint16x32", + argLen: 3, + commutative: true, + generic: true, + }, { name: "PopCountUint16x32", argLen: 1, generic: true, }, + { + name: "PopCountMaskedUint16x32", + argLen: 2, + generic: true, + }, { name: "SaturatedAddUint16x32", argLen: 2, commutative: true, generic: true, }, + { + name: "SaturatedAddMaskedUint16x32", + argLen: 3, + commutative: true, + generic: true, + }, { name: "SaturatedSubUint16x32", argLen: 2, generic: true, }, + { + name: "SaturatedSubMaskedUint16x32", + argLen: 3, + generic: true, + }, { name: "ShiftLeftUint16x32", argLen: 2, @@ -63944,179 +63914,139 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "ShiftRightUint16x32", - argLen: 2, + name: "ShiftLeftAndFillUpperFromMaskedUint16x32", + argLen: 4, generic: true, }, { - name: "ShiftRightAndFillUpperFromUint16x32", + name: "ShiftLeftMaskedUint16x32", argLen: 3, generic: true, }, { - name: "ShiftRightSignExtendedUint16x32", + name: "ShiftRightUint16x32", argLen: 2, generic: true, }, { - name: "SubUint16x32", - argLen: 2, + name: "ShiftRightAndFillUpperFromUint16x32", + argLen: 3, generic: true, }, { - name: "AddUint16x8", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "AndUint16x8", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "AndNotUint16x8", - argLen: 2, + name: "ShiftRightAndFillUpperFromMaskedUint16x32", + argLen: 4, generic: true, }, { - name: "AverageUint16x8", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "EqualUint16x8", - argLen: 2, - commutative: true, - generic: true, + name: "ShiftRightMaskedUint16x32", + argLen: 3, + generic: true, }, { - name: "GreaterUint16x8", + name: "ShiftRightSignExtendedUint16x32", argLen: 2, generic: true, }, { - name: "GreaterEqualUint16x8", - argLen: 2, + name: "ShiftRightSignExtendedMaskedUint16x32", + argLen: 3, generic: true, }, { - name: "LessUint16x8", + name: "SubUint16x32", argLen: 2, generic: true, }, { - name: "LessEqualUint16x8", - argLen: 2, + name: "SubMaskedUint16x32", + argLen: 3, generic: true, }, { - name: "MaskedAddUint16x8", - argLen: 3, + name: "AddUint16x8", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedAverageUint16x8", + name: "AddMaskedUint16x8", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedEqualUint16x8", - argLen: 3, + name: "AndUint16x8", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedGreaterUint16x8", - argLen: 3, - generic: true, - }, - { - name: "MaskedGreaterEqualUint16x8", - argLen: 3, - generic: true, - }, - { - name: "MaskedLessUint16x8", - argLen: 3, - generic: true, - }, - { - name: "MaskedLessEqualUint16x8", - argLen: 3, + name: "AndNotUint16x8", + argLen: 2, generic: true, }, { - name: "MaskedMaxUint16x8", - argLen: 3, + name: "AverageUint16x8", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedMinUint16x8", + name: "AverageMaskedUint16x8", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedMulHighUint16x8", - argLen: 3, + name: "EqualUint16x8", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedNotEqualUint16x8", + name: "EqualMaskedUint16x8", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedPopCountUint16x8", + name: "GreaterUint16x8", argLen: 2, generic: true, }, { - name: "MaskedSaturatedAddUint16x8", - argLen: 3, - commutative: true, - generic: true, - }, - { - name: "MaskedSaturatedSubUint16x8", - argLen: 3, + name: "GreaterEqualUint16x8", + argLen: 2, generic: true, }, { - name: "MaskedShiftLeftUint16x8", + name: "GreaterEqualMaskedUint16x8", argLen: 3, generic: true, }, { - name: "MaskedShiftLeftAndFillUpperFromUint16x8", - argLen: 4, + name: "GreaterMaskedUint16x8", + argLen: 3, generic: true, }, { - name: "MaskedShiftRightUint16x8", - argLen: 3, + name: "LessUint16x8", + argLen: 2, generic: true, }, { - name: "MaskedShiftRightAndFillUpperFromUint16x8", - argLen: 4, + name: "LessEqualUint16x8", + argLen: 2, generic: true, }, { - name: "MaskedShiftRightSignExtendedUint16x8", + name: "LessEqualMaskedUint16x8", argLen: 3, generic: true, }, { - name: "MaskedSubUint16x8", + name: "LessMaskedUint16x8", argLen: 3, generic: true, }, @@ -64126,24 +64056,48 @@ var opcodeTable = [...]opInfo{ commutative: true, generic: true, }, + { + name: "MaxMaskedUint16x8", + argLen: 3, + commutative: true, + generic: true, + }, { name: "MinUint16x8", argLen: 2, commutative: true, generic: true, }, + { + name: "MinMaskedUint16x8", + argLen: 3, + commutative: true, + generic: true, + }, { name: "MulHighUint16x8", argLen: 2, commutative: true, generic: true, }, + { + name: "MulHighMaskedUint16x8", + argLen: 3, + commutative: true, + generic: true, + }, { name: "NotEqualUint16x8", argLen: 2, commutative: true, generic: true, }, + { + name: "NotEqualMaskedUint16x8", + argLen: 3, + commutative: true, + generic: true, + }, { name: "OrUint16x8", argLen: 2, @@ -64165,17 +64119,33 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "PopCountMaskedUint16x8", + argLen: 2, + generic: true, + }, { name: "SaturatedAddUint16x8", argLen: 2, commutative: true, generic: true, }, + { + name: "SaturatedAddMaskedUint16x8", + argLen: 3, + commutative: true, + generic: true, + }, { name: "SaturatedSubUint16x8", argLen: 2, generic: true, }, + { + name: "SaturatedSubMaskedUint16x8", + argLen: 3, + generic: true, + }, { name: "ShiftAllLeftUint16x8", argLen: 2, @@ -64197,211 +64167,168 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "ShiftRightUint16x8", - argLen: 2, + name: "ShiftLeftAndFillUpperFromMaskedUint16x8", + argLen: 4, generic: true, }, { - name: "ShiftRightAndFillUpperFromUint16x8", + name: "ShiftLeftMaskedUint16x8", argLen: 3, generic: true, }, { - name: "ShiftRightSignExtendedUint16x8", + name: "ShiftRightUint16x8", argLen: 2, generic: true, }, { - name: "SubUint16x8", - argLen: 2, + name: "ShiftRightAndFillUpperFromUint16x8", + argLen: 3, generic: true, }, { - name: "XorUint16x8", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "AddUint32x16", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "AndUint32x16", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "AndNotUint32x16", - argLen: 2, + name: "ShiftRightAndFillUpperFromMaskedUint16x8", + argLen: 4, generic: true, }, { - name: "EqualUint32x16", - argLen: 2, - commutative: true, - generic: true, + name: "ShiftRightMaskedUint16x8", + argLen: 3, + generic: true, }, { - name: "GreaterUint32x16", + name: "ShiftRightSignExtendedUint16x8", argLen: 2, generic: true, }, { - name: "GreaterEqualUint32x16", - argLen: 2, + name: "ShiftRightSignExtendedMaskedUint16x8", + argLen: 3, generic: true, }, { - name: "LessUint32x16", + name: "SubUint16x8", argLen: 2, generic: true, }, { - name: "LessEqualUint32x16", - argLen: 2, + name: "SubMaskedUint16x8", + argLen: 3, generic: true, }, { - name: "MaskedAddUint32x16", - argLen: 3, + name: "XorUint16x8", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedAndUint32x16", - argLen: 3, + name: "AddUint32x16", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedAndNotUint32x16", - argLen: 3, - generic: true, - }, - { - name: "MaskedEqualUint32x16", + name: "AddMaskedUint32x16", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedGreaterUint32x16", - argLen: 3, - generic: true, + name: "AndUint32x16", + argLen: 2, + commutative: true, + generic: true, }, { - name: "MaskedGreaterEqualUint32x16", - argLen: 3, - generic: true, + name: "AndMaskedUint32x16", + argLen: 3, + commutative: true, + generic: true, }, { - name: "MaskedLessUint32x16", - argLen: 3, + name: "AndNotUint32x16", + argLen: 2, generic: true, }, { - name: "MaskedLessEqualUint32x16", + name: "AndNotMaskedUint32x16", argLen: 3, generic: true, }, { - name: "MaskedMaxUint32x16", - argLen: 3, - commutative: true, - generic: true, - }, - { - name: "MaskedMinUint32x16", - argLen: 3, - commutative: true, - generic: true, - }, - { - name: "MaskedNotEqualUint32x16", - argLen: 3, + name: "EqualUint32x16", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedOrUint32x16", + name: "EqualMaskedUint32x16", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedPopCountUint32x16", + name: "GreaterUint32x16", argLen: 2, generic: true, }, { - name: "MaskedRotateLeftUint32x16", - argLen: 3, + name: "GreaterEqualUint32x16", + argLen: 2, generic: true, }, { - name: "MaskedRotateRightUint32x16", + name: "GreaterEqualMaskedUint32x16", argLen: 3, generic: true, }, { - name: "MaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16", - argLen: 4, - generic: true, - }, - { - name: "MaskedShiftLeftUint32x16", + name: "GreaterMaskedUint32x16", argLen: 3, generic: true, }, { - name: "MaskedShiftLeftAndFillUpperFromUint32x16", - argLen: 4, - generic: true, - }, - { - name: "MaskedShiftRightUint32x16", - argLen: 3, + name: "LessUint32x16", + argLen: 2, generic: true, }, { - name: "MaskedShiftRightAndFillUpperFromUint32x16", - argLen: 4, + name: "LessEqualUint32x16", + argLen: 2, generic: true, }, { - name: "MaskedShiftRightSignExtendedUint32x16", + name: "LessEqualMaskedUint32x16", argLen: 3, generic: true, }, { - name: "MaskedSubUint32x16", + name: "LessMaskedUint32x16", argLen: 3, generic: true, }, { - name: "MaskedUnsignedSignedQuadDotProdAccumulateUint32x16", - argLen: 4, - generic: true, + name: "MaxUint32x16", + argLen: 2, + commutative: true, + generic: true, }, { - name: "MaskedXorUint32x16", + name: "MaxMaskedUint32x16", argLen: 3, commutative: true, generic: true, }, { - name: "MaxUint32x16", + name: "MinUint32x16", argLen: 2, commutative: true, generic: true, }, { - name: "MinUint32x16", - argLen: 2, + name: "MinMaskedUint32x16", + argLen: 3, commutative: true, generic: true, }, @@ -64412,252 +64339,252 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "OrUint32x16", - argLen: 2, + name: "NotEqualMaskedUint32x16", + argLen: 3, commutative: true, generic: true, }, { - name: "PopCountUint32x16", - argLen: 1, - generic: true, - }, - { - name: "RotateLeftUint32x16", - argLen: 2, - generic: true, + name: "OrUint32x16", + argLen: 2, + commutative: true, + generic: true, }, { - name: "RotateRightUint32x16", - argLen: 2, - generic: true, + name: "OrMaskedUint32x16", + argLen: 3, + commutative: true, + generic: true, }, { - name: "SaturatedUnsignedSignedQuadDotProdAccumulateUint32x16", - argLen: 3, + name: "PopCountUint32x16", + argLen: 1, generic: true, }, { - name: "ShiftLeftUint32x16", + name: "PopCountMaskedUint32x16", argLen: 2, generic: true, }, { - name: "ShiftLeftAndFillUpperFromUint32x16", - argLen: 3, - generic: true, - }, - { - name: "ShiftRightUint32x16", + name: "RotateLeftUint32x16", argLen: 2, generic: true, }, { - name: "ShiftRightAndFillUpperFromUint32x16", + name: "RotateLeftMaskedUint32x16", argLen: 3, generic: true, }, { - name: "ShiftRightSignExtendedUint32x16", - argLen: 2, - generic: true, - }, - { - name: "SubUint32x16", + name: "RotateRightUint32x16", argLen: 2, generic: true, }, { - name: "UnsignedSignedQuadDotProdAccumulateUint32x16", + name: "RotateRightMaskedUint32x16", argLen: 3, generic: true, }, { - name: "XorUint32x16", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "AddUint32x4", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "AndUint32x4", - argLen: 2, - commutative: true, - generic: true, + name: "SaturatedUnsignedSignedQuadDotProdAccumulateUint32x16", + argLen: 3, + generic: true, }, { - name: "AndNotUint32x4", - argLen: 2, + name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x16", + argLen: 4, generic: true, }, { - name: "EqualUint32x4", - argLen: 2, - commutative: true, - generic: true, + name: "ShiftLeftUint32x16", + argLen: 2, + generic: true, }, { - name: "GreaterUint32x4", - argLen: 2, + name: "ShiftLeftAndFillUpperFromUint32x16", + argLen: 3, generic: true, }, { - name: "GreaterEqualUint32x4", - argLen: 2, + name: "ShiftLeftAndFillUpperFromMaskedUint32x16", + argLen: 4, generic: true, }, { - name: "LessUint32x4", - argLen: 2, + name: "ShiftLeftMaskedUint32x16", + argLen: 3, generic: true, }, { - name: "LessEqualUint32x4", + name: "ShiftRightUint32x16", argLen: 2, generic: true, }, { - name: "MaskedAddUint32x4", - argLen: 3, - commutative: true, - generic: true, + name: "ShiftRightAndFillUpperFromUint32x16", + argLen: 3, + generic: true, }, { - name: "MaskedAndUint32x4", - argLen: 3, - commutative: true, - generic: true, + name: "ShiftRightAndFillUpperFromMaskedUint32x16", + argLen: 4, + generic: true, }, { - name: "MaskedAndNotUint32x4", + name: "ShiftRightMaskedUint32x16", argLen: 3, generic: true, }, { - name: "MaskedEqualUint32x4", - argLen: 3, - commutative: true, - generic: true, + name: "ShiftRightSignExtendedUint32x16", + argLen: 2, + generic: true, }, { - name: "MaskedGreaterUint32x4", + name: "ShiftRightSignExtendedMaskedUint32x16", argLen: 3, generic: true, }, { - name: "MaskedGreaterEqualUint32x4", - argLen: 3, + name: "SubUint32x16", + argLen: 2, generic: true, }, { - name: "MaskedLessUint32x4", + name: "SubMaskedUint32x16", argLen: 3, generic: true, }, { - name: "MaskedLessEqualUint32x4", + name: "UnsignedSignedQuadDotProdAccumulateUint32x16", argLen: 3, generic: true, }, { - name: "MaskedMaxUint32x4", - argLen: 3, + name: "UnsignedSignedQuadDotProdAccumulateMaskedUint32x16", + argLen: 4, + generic: true, + }, + { + name: "XorUint32x16", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedMinUint32x4", + name: "XorMaskedUint32x16", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedNotEqualUint32x4", + name: "AddUint32x4", + argLen: 2, + commutative: true, + generic: true, + }, + { + name: "AddMaskedUint32x4", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedOrUint32x4", + name: "AndUint32x4", + argLen: 2, + commutative: true, + generic: true, + }, + { + name: "AndMaskedUint32x4", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedPopCountUint32x4", + name: "AndNotUint32x4", argLen: 2, generic: true, }, { - name: "MaskedRotateLeftUint32x4", + name: "AndNotMaskedUint32x4", argLen: 3, generic: true, }, { - name: "MaskedRotateRightUint32x4", - argLen: 3, + name: "EqualUint32x4", + argLen: 2, + commutative: true, + generic: true, + }, + { + name: "EqualMaskedUint32x4", + argLen: 3, + commutative: true, + generic: true, + }, + { + name: "GreaterUint32x4", + argLen: 2, generic: true, }, { - name: "MaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4", - argLen: 4, + name: "GreaterEqualUint32x4", + argLen: 2, generic: true, }, { - name: "MaskedShiftLeftUint32x4", + name: "GreaterEqualMaskedUint32x4", argLen: 3, generic: true, }, { - name: "MaskedShiftLeftAndFillUpperFromUint32x4", - argLen: 4, + name: "GreaterMaskedUint32x4", + argLen: 3, generic: true, }, { - name: "MaskedShiftRightUint32x4", - argLen: 3, + name: "LessUint32x4", + argLen: 2, generic: true, }, { - name: "MaskedShiftRightAndFillUpperFromUint32x4", - argLen: 4, + name: "LessEqualUint32x4", + argLen: 2, generic: true, }, { - name: "MaskedShiftRightSignExtendedUint32x4", + name: "LessEqualMaskedUint32x4", argLen: 3, generic: true, }, { - name: "MaskedSubUint32x4", + name: "LessMaskedUint32x4", argLen: 3, generic: true, }, { - name: "MaskedUnsignedSignedQuadDotProdAccumulateUint32x4", - argLen: 4, - generic: true, + name: "MaxUint32x4", + argLen: 2, + commutative: true, + generic: true, }, { - name: "MaskedXorUint32x4", + name: "MaxMaskedUint32x4", argLen: 3, commutative: true, generic: true, }, { - name: "MaxUint32x4", + name: "MinUint32x4", argLen: 2, commutative: true, generic: true, }, { - name: "MinUint32x4", - argLen: 2, + name: "MinMaskedUint32x4", + argLen: 3, commutative: true, generic: true, }, @@ -64673,12 +64600,24 @@ var opcodeTable = [...]opInfo{ commutative: true, generic: true, }, + { + name: "NotEqualMaskedUint32x4", + argLen: 3, + commutative: true, + generic: true, + }, { name: "OrUint32x4", argLen: 2, commutative: true, generic: true, }, + { + name: "OrMaskedUint32x4", + argLen: 3, + commutative: true, + generic: true, + }, { name: "PairwiseAddUint32x4", argLen: 2, @@ -64694,21 +64633,41 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "PopCountMaskedUint32x4", + argLen: 2, + generic: true, + }, { name: "RotateLeftUint32x4", argLen: 2, generic: true, }, + { + name: "RotateLeftMaskedUint32x4", + argLen: 3, + generic: true, + }, { name: "RotateRightUint32x4", argLen: 2, generic: true, }, + { + name: "RotateRightMaskedUint32x4", + argLen: 3, + generic: true, + }, { name: "SaturatedUnsignedSignedQuadDotProdAccumulateUint32x4", argLen: 3, generic: true, }, + { + name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x4", + argLen: 4, + generic: true, + }, { name: "ShiftAllLeftUint32x4", argLen: 2, @@ -64730,216 +64689,184 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "ShiftRightUint32x4", - argLen: 2, + name: "ShiftLeftAndFillUpperFromMaskedUint32x4", + argLen: 4, generic: true, }, { - name: "ShiftRightAndFillUpperFromUint32x4", + name: "ShiftLeftMaskedUint32x4", argLen: 3, generic: true, }, { - name: "ShiftRightSignExtendedUint32x4", - argLen: 2, - generic: true, - }, - { - name: "SubUint32x4", + name: "ShiftRightUint32x4", argLen: 2, generic: true, }, { - name: "UnsignedSignedQuadDotProdAccumulateUint32x4", + name: "ShiftRightAndFillUpperFromUint32x4", argLen: 3, generic: true, }, { - name: "XorUint32x4", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "AddUint32x8", - argLen: 2, - commutative: true, - generic: true, + name: "ShiftRightAndFillUpperFromMaskedUint32x4", + argLen: 4, + generic: true, }, { - name: "AndUint32x8", - argLen: 2, - commutative: true, - generic: true, + name: "ShiftRightMaskedUint32x4", + argLen: 3, + generic: true, }, { - name: "AndNotUint32x8", + name: "ShiftRightSignExtendedUint32x4", argLen: 2, generic: true, }, { - name: "EqualUint32x8", - argLen: 2, - commutative: true, - generic: true, + name: "ShiftRightSignExtendedMaskedUint32x4", + argLen: 3, + generic: true, }, { - name: "GreaterUint32x8", + name: "SubUint32x4", argLen: 2, generic: true, }, { - name: "GreaterEqualUint32x8", - argLen: 2, + name: "SubMaskedUint32x4", + argLen: 3, generic: true, }, { - name: "LessUint32x8", - argLen: 2, + name: "UnsignedSignedQuadDotProdAccumulateUint32x4", + argLen: 3, generic: true, }, { - name: "LessEqualUint32x8", - argLen: 2, + name: "UnsignedSignedQuadDotProdAccumulateMaskedUint32x4", + argLen: 4, generic: true, }, { - name: "MaskedAddUint32x8", - argLen: 3, + name: "XorUint32x4", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedAndUint32x8", + name: "XorMaskedUint32x4", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedAndNotUint32x8", - argLen: 3, - generic: true, + name: "AddUint32x8", + argLen: 2, + commutative: true, + generic: true, }, { - name: "MaskedEqualUint32x8", + name: "AddMaskedUint32x8", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedGreaterUint32x8", - argLen: 3, - generic: true, + name: "AndUint32x8", + argLen: 2, + commutative: true, + generic: true, }, { - name: "MaskedGreaterEqualUint32x8", - argLen: 3, - generic: true, + name: "AndMaskedUint32x8", + argLen: 3, + commutative: true, + generic: true, }, { - name: "MaskedLessUint32x8", - argLen: 3, + name: "AndNotUint32x8", + argLen: 2, generic: true, }, { - name: "MaskedLessEqualUint32x8", + name: "AndNotMaskedUint32x8", argLen: 3, generic: true, }, { - name: "MaskedMaxUint32x8", - argLen: 3, - commutative: true, - generic: true, - }, - { - name: "MaskedMinUint32x8", - argLen: 3, - commutative: true, - generic: true, - }, - { - name: "MaskedNotEqualUint32x8", - argLen: 3, + name: "EqualUint32x8", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedOrUint32x8", + name: "EqualMaskedUint32x8", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedPopCountUint32x8", + name: "GreaterUint32x8", argLen: 2, generic: true, }, { - name: "MaskedRotateLeftUint32x8", - argLen: 3, + name: "GreaterEqualUint32x8", + argLen: 2, generic: true, }, { - name: "MaskedRotateRightUint32x8", + name: "GreaterEqualMaskedUint32x8", argLen: 3, generic: true, }, { - name: "MaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8", - argLen: 4, - generic: true, - }, - { - name: "MaskedShiftLeftUint32x8", + name: "GreaterMaskedUint32x8", argLen: 3, generic: true, }, { - name: "MaskedShiftLeftAndFillUpperFromUint32x8", - argLen: 4, - generic: true, - }, - { - name: "MaskedShiftRightUint32x8", - argLen: 3, + name: "LessUint32x8", + argLen: 2, generic: true, }, { - name: "MaskedShiftRightAndFillUpperFromUint32x8", - argLen: 4, + name: "LessEqualUint32x8", + argLen: 2, generic: true, }, { - name: "MaskedShiftRightSignExtendedUint32x8", + name: "LessEqualMaskedUint32x8", argLen: 3, generic: true, }, { - name: "MaskedSubUint32x8", + name: "LessMaskedUint32x8", argLen: 3, generic: true, }, { - name: "MaskedUnsignedSignedQuadDotProdAccumulateUint32x8", - argLen: 4, - generic: true, + name: "MaxUint32x8", + argLen: 2, + commutative: true, + generic: true, }, { - name: "MaskedXorUint32x8", + name: "MaxMaskedUint32x8", argLen: 3, commutative: true, generic: true, }, { - name: "MaxUint32x8", + name: "MinUint32x8", argLen: 2, commutative: true, generic: true, }, { - name: "MinUint32x8", - argLen: 2, + name: "MinMaskedUint32x8", + argLen: 3, commutative: true, generic: true, }, @@ -64955,12 +64882,24 @@ var opcodeTable = [...]opInfo{ commutative: true, generic: true, }, + { + name: "NotEqualMaskedUint32x8", + argLen: 3, + commutative: true, + generic: true, + }, { name: "OrUint32x8", argLen: 2, commutative: true, generic: true, }, + { + name: "OrMaskedUint32x8", + argLen: 3, + commutative: true, + generic: true, + }, { name: "PairwiseAddUint32x8", argLen: 2, @@ -64977,251 +64916,227 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "RotateLeftUint32x8", + name: "PopCountMaskedUint32x8", argLen: 2, generic: true, }, { - name: "RotateRightUint32x8", + name: "RotateLeftUint32x8", argLen: 2, generic: true, }, { - name: "SaturatedUnsignedSignedQuadDotProdAccumulateUint32x8", + name: "RotateLeftMaskedUint32x8", argLen: 3, generic: true, }, { - name: "ShiftAllLeftUint32x8", - argLen: 2, - generic: true, - }, - { - name: "ShiftAllRightUint32x8", + name: "RotateRightUint32x8", argLen: 2, generic: true, }, { - name: "ShiftLeftUint32x8", - argLen: 2, + name: "RotateRightMaskedUint32x8", + argLen: 3, generic: true, }, { - name: "ShiftLeftAndFillUpperFromUint32x8", + name: "SaturatedUnsignedSignedQuadDotProdAccumulateUint32x8", argLen: 3, generic: true, }, { - name: "ShiftRightUint32x8", - argLen: 2, + name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x8", + argLen: 4, generic: true, }, { - name: "ShiftRightAndFillUpperFromUint32x8", - argLen: 3, + name: "ShiftAllLeftUint32x8", + argLen: 2, generic: true, }, { - name: "ShiftRightSignExtendedUint32x8", + name: "ShiftAllRightUint32x8", argLen: 2, generic: true, }, { - name: "SubUint32x8", + name: "ShiftLeftUint32x8", argLen: 2, generic: true, }, { - name: "UnsignedSignedQuadDotProdAccumulateUint32x8", + name: "ShiftLeftAndFillUpperFromUint32x8", argLen: 3, generic: true, }, { - name: "XorUint32x8", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "AddUint64x2", - argLen: 2, - commutative: true, - generic: true, + name: "ShiftLeftAndFillUpperFromMaskedUint32x8", + argLen: 4, + generic: true, }, { - name: "AndUint64x2", - argLen: 2, - commutative: true, - generic: true, + name: "ShiftLeftMaskedUint32x8", + argLen: 3, + generic: true, }, { - name: "AndNotUint64x2", + name: "ShiftRightUint32x8", argLen: 2, generic: true, }, { - name: "EqualUint64x2", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "GreaterUint64x2", - argLen: 2, + name: "ShiftRightAndFillUpperFromUint32x8", + argLen: 3, generic: true, }, { - name: "GreaterEqualUint64x2", - argLen: 2, + name: "ShiftRightAndFillUpperFromMaskedUint32x8", + argLen: 4, generic: true, }, { - name: "LessUint64x2", - argLen: 2, + name: "ShiftRightMaskedUint32x8", + argLen: 3, generic: true, }, { - name: "LessEqualUint64x2", + name: "ShiftRightSignExtendedUint32x8", argLen: 2, generic: true, }, { - name: "MaskedAddUint64x2", - argLen: 3, - commutative: true, - generic: true, - }, - { - name: "MaskedAndUint64x2", - argLen: 3, - commutative: true, - generic: true, - }, - { - name: "MaskedAndNotUint64x2", + name: "ShiftRightSignExtendedMaskedUint32x8", argLen: 3, generic: true, }, { - name: "MaskedEqualUint64x2", - argLen: 3, - commutative: true, - generic: true, + name: "SubUint32x8", + argLen: 2, + generic: true, }, { - name: "MaskedGreaterUint64x2", + name: "SubMaskedUint32x8", argLen: 3, generic: true, }, { - name: "MaskedGreaterEqualUint64x2", + name: "UnsignedSignedQuadDotProdAccumulateUint32x8", argLen: 3, generic: true, }, { - name: "MaskedLessUint64x2", - argLen: 3, + name: "UnsignedSignedQuadDotProdAccumulateMaskedUint32x8", + argLen: 4, generic: true, }, { - name: "MaskedLessEqualUint64x2", - argLen: 3, - generic: true, + name: "XorUint32x8", + argLen: 2, + commutative: true, + generic: true, }, { - name: "MaskedMaxUint64x2", + name: "XorMaskedUint32x8", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedMinUint64x2", - argLen: 3, + name: "AddUint64x2", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedMulEvenWidenUint64x2", + name: "AddMaskedUint64x2", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedNotEqualUint64x2", - argLen: 3, + name: "AndUint64x2", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedOrUint64x2", + name: "AndMaskedUint64x2", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedPopCountUint64x2", + name: "AndNotUint64x2", argLen: 2, generic: true, }, { - name: "MaskedRotateLeftUint64x2", + name: "AndNotMaskedUint64x2", argLen: 3, generic: true, }, { - name: "MaskedRotateRightUint64x2", - argLen: 3, - generic: true, + name: "EqualUint64x2", + argLen: 2, + commutative: true, + generic: true, }, { - name: "MaskedShiftAllLeftUint64x2", - argLen: 3, + name: "EqualMaskedUint64x2", + argLen: 3, + commutative: true, + generic: true, + }, + { + name: "GreaterUint64x2", + argLen: 2, generic: true, }, { - name: "MaskedShiftAllRightUint64x2", - argLen: 3, + name: "GreaterEqualUint64x2", + argLen: 2, generic: true, }, { - name: "MaskedShiftLeftUint64x2", + name: "GreaterEqualMaskedUint64x2", argLen: 3, generic: true, }, { - name: "MaskedShiftLeftAndFillUpperFromUint64x2", - argLen: 4, + name: "GreaterMaskedUint64x2", + argLen: 3, generic: true, }, { - name: "MaskedShiftRightUint64x2", - argLen: 3, + name: "LessUint64x2", + argLen: 2, generic: true, }, { - name: "MaskedShiftRightAndFillUpperFromUint64x2", - argLen: 4, + name: "LessEqualUint64x2", + argLen: 2, generic: true, }, { - name: "MaskedShiftRightSignExtendedUint64x2", + name: "LessEqualMaskedUint64x2", argLen: 3, generic: true, }, { - name: "MaskedSubUint64x2", + name: "LessMaskedUint64x2", argLen: 3, generic: true, }, { - name: "MaskedXorUint64x2", - argLen: 3, + name: "MaxUint64x2", + argLen: 2, commutative: true, generic: true, }, { - name: "MaxUint64x2", - argLen: 2, + name: "MaxMaskedUint64x2", + argLen: 3, commutative: true, generic: true, }, @@ -65231,18 +65146,36 @@ var opcodeTable = [...]opInfo{ commutative: true, generic: true, }, + { + name: "MinMaskedUint64x2", + argLen: 3, + commutative: true, + generic: true, + }, { name: "MulEvenWidenUint64x2", argLen: 2, commutative: true, generic: true, }, + { + name: "MulEvenWidenMaskedUint64x2", + argLen: 3, + commutative: true, + generic: true, + }, { name: "NotEqualUint64x2", argLen: 2, commutative: true, generic: true, }, + { + name: "NotEqualMaskedUint64x2", + argLen: 3, + commutative: true, + generic: true, + }, { name: "OrUint64x2", argLen: 2, @@ -65250,252 +65183,240 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "PopCountUint64x2", - argLen: 1, - generic: true, + name: "OrMaskedUint64x2", + argLen: 3, + commutative: true, + generic: true, }, { - name: "RotateLeftUint64x2", - argLen: 2, + name: "PopCountUint64x2", + argLen: 1, generic: true, }, { - name: "RotateRightUint64x2", + name: "PopCountMaskedUint64x2", argLen: 2, generic: true, }, { - name: "ShiftAllLeftUint64x2", + name: "RotateLeftUint64x2", argLen: 2, generic: true, }, { - name: "ShiftAllRightUint64x2", - argLen: 2, + name: "RotateLeftMaskedUint64x2", + argLen: 3, generic: true, }, { - name: "ShiftLeftUint64x2", + name: "RotateRightUint64x2", argLen: 2, generic: true, }, { - name: "ShiftLeftAndFillUpperFromUint64x2", + name: "RotateRightMaskedUint64x2", argLen: 3, generic: true, }, { - name: "ShiftRightUint64x2", + name: "ShiftAllLeftUint64x2", argLen: 2, generic: true, }, { - name: "ShiftRightAndFillUpperFromUint64x2", + name: "ShiftAllLeftMaskedUint64x2", argLen: 3, generic: true, }, { - name: "ShiftRightSignExtendedUint64x2", + name: "ShiftAllRightUint64x2", argLen: 2, generic: true, }, { - name: "SubUint64x2", - argLen: 2, + name: "ShiftAllRightMaskedUint64x2", + argLen: 3, generic: true, }, { - name: "XorUint64x2", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "AddUint64x4", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "AndUint64x4", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "AndNotUint64x4", + name: "ShiftLeftUint64x2", argLen: 2, generic: true, }, { - name: "EqualUint64x4", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "GreaterUint64x4", - argLen: 2, + name: "ShiftLeftAndFillUpperFromUint64x2", + argLen: 3, generic: true, }, { - name: "GreaterEqualUint64x4", - argLen: 2, + name: "ShiftLeftAndFillUpperFromMaskedUint64x2", + argLen: 4, generic: true, }, { - name: "LessUint64x4", - argLen: 2, + name: "ShiftLeftMaskedUint64x2", + argLen: 3, generic: true, }, { - name: "LessEqualUint64x4", + name: "ShiftRightUint64x2", argLen: 2, generic: true, }, { - name: "MaskedAddUint64x4", - argLen: 3, - commutative: true, - generic: true, + name: "ShiftRightAndFillUpperFromUint64x2", + argLen: 3, + generic: true, }, { - name: "MaskedAndUint64x4", - argLen: 3, - commutative: true, - generic: true, + name: "ShiftRightAndFillUpperFromMaskedUint64x2", + argLen: 4, + generic: true, }, { - name: "MaskedAndNotUint64x4", + name: "ShiftRightMaskedUint64x2", argLen: 3, generic: true, }, { - name: "MaskedEqualUint64x4", - argLen: 3, - commutative: true, - generic: true, + name: "ShiftRightSignExtendedUint64x2", + argLen: 2, + generic: true, }, { - name: "MaskedGreaterUint64x4", + name: "ShiftRightSignExtendedMaskedUint64x2", argLen: 3, generic: true, }, { - name: "MaskedGreaterEqualUint64x4", - argLen: 3, + name: "SubUint64x2", + argLen: 2, generic: true, }, { - name: "MaskedLessUint64x4", + name: "SubMaskedUint64x2", argLen: 3, generic: true, }, { - name: "MaskedLessEqualUint64x4", - argLen: 3, - generic: true, + name: "XorUint64x2", + argLen: 2, + commutative: true, + generic: true, }, { - name: "MaskedMaxUint64x4", + name: "XorMaskedUint64x2", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedMinUint64x4", - argLen: 3, + name: "AddUint64x4", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedMulEvenWidenUint64x4", + name: "AddMaskedUint64x4", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedNotEqualUint64x4", - argLen: 3, + name: "AndUint64x4", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedOrUint64x4", + name: "AndMaskedUint64x4", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedPopCountUint64x4", + name: "AndNotUint64x4", argLen: 2, generic: true, }, { - name: "MaskedRotateLeftUint64x4", + name: "AndNotMaskedUint64x4", argLen: 3, generic: true, }, { - name: "MaskedRotateRightUint64x4", - argLen: 3, + name: "EqualUint64x4", + argLen: 2, + commutative: true, + generic: true, + }, + { + name: "EqualMaskedUint64x4", + argLen: 3, + commutative: true, + generic: true, + }, + { + name: "GreaterUint64x4", + argLen: 2, + generic: true, + }, + { + name: "GreaterEqualUint64x4", + argLen: 2, generic: true, }, { - name: "MaskedShiftAllLeftUint64x4", + name: "GreaterEqualMaskedUint64x4", argLen: 3, generic: true, }, { - name: "MaskedShiftAllRightUint64x4", + name: "GreaterMaskedUint64x4", argLen: 3, generic: true, }, { - name: "MaskedShiftLeftUint64x4", - argLen: 3, + name: "LessUint64x4", + argLen: 2, generic: true, }, { - name: "MaskedShiftLeftAndFillUpperFromUint64x4", - argLen: 4, + name: "LessEqualUint64x4", + argLen: 2, generic: true, }, { - name: "MaskedShiftRightUint64x4", + name: "LessEqualMaskedUint64x4", argLen: 3, generic: true, }, { - name: "MaskedShiftRightAndFillUpperFromUint64x4", - argLen: 4, - generic: true, - }, - { - name: "MaskedShiftRightSignExtendedUint64x4", + name: "LessMaskedUint64x4", argLen: 3, generic: true, }, { - name: "MaskedSubUint64x4", - argLen: 3, - generic: true, + name: "MaxUint64x4", + argLen: 2, + commutative: true, + generic: true, }, { - name: "MaskedXorUint64x4", + name: "MaxMaskedUint64x4", argLen: 3, commutative: true, generic: true, }, { - name: "MaxUint64x4", + name: "MinUint64x4", argLen: 2, commutative: true, generic: true, }, { - name: "MinUint64x4", - argLen: 2, + name: "MinMaskedUint64x4", + argLen: 3, commutative: true, generic: true, }, @@ -65505,12 +65426,24 @@ var opcodeTable = [...]opInfo{ commutative: true, generic: true, }, + { + name: "MulEvenWidenMaskedUint64x4", + argLen: 3, + commutative: true, + generic: true, + }, { name: "NotEqualUint64x4", argLen: 2, commutative: true, generic: true, }, + { + name: "NotEqualMaskedUint64x4", + argLen: 3, + commutative: true, + generic: true, + }, { name: "OrUint64x4", argLen: 2, @@ -65518,246 +65451,228 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "PopCountUint64x4", - argLen: 1, - generic: true, + name: "OrMaskedUint64x4", + argLen: 3, + commutative: true, + generic: true, }, { - name: "RotateLeftUint64x4", - argLen: 2, + name: "PopCountUint64x4", + argLen: 1, generic: true, }, { - name: "RotateRightUint64x4", + name: "PopCountMaskedUint64x4", argLen: 2, generic: true, }, { - name: "ShiftAllLeftUint64x4", + name: "RotateLeftUint64x4", argLen: 2, generic: true, }, { - name: "ShiftAllRightUint64x4", - argLen: 2, + name: "RotateLeftMaskedUint64x4", + argLen: 3, generic: true, }, { - name: "ShiftLeftUint64x4", + name: "RotateRightUint64x4", argLen: 2, generic: true, }, { - name: "ShiftLeftAndFillUpperFromUint64x4", + name: "RotateRightMaskedUint64x4", argLen: 3, generic: true, }, { - name: "ShiftRightUint64x4", + name: "ShiftAllLeftUint64x4", argLen: 2, generic: true, }, { - name: "ShiftRightAndFillUpperFromUint64x4", + name: "ShiftAllLeftMaskedUint64x4", argLen: 3, generic: true, }, { - name: "ShiftRightSignExtendedUint64x4", + name: "ShiftAllRightUint64x4", argLen: 2, generic: true, }, { - name: "SubUint64x4", - argLen: 2, + name: "ShiftAllRightMaskedUint64x4", + argLen: 3, generic: true, }, { - name: "XorUint64x4", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "AddUint64x8", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "AndUint64x8", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "AndNotUint64x8", + name: "ShiftLeftUint64x4", argLen: 2, generic: true, }, { - name: "EqualUint64x8", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "GreaterUint64x8", - argLen: 2, + name: "ShiftLeftAndFillUpperFromUint64x4", + argLen: 3, generic: true, }, { - name: "GreaterEqualUint64x8", - argLen: 2, + name: "ShiftLeftAndFillUpperFromMaskedUint64x4", + argLen: 4, generic: true, }, { - name: "LessUint64x8", - argLen: 2, + name: "ShiftLeftMaskedUint64x4", + argLen: 3, generic: true, }, { - name: "LessEqualUint64x8", + name: "ShiftRightUint64x4", argLen: 2, generic: true, }, { - name: "MaskedAddUint64x8", - argLen: 3, - commutative: true, - generic: true, + name: "ShiftRightAndFillUpperFromUint64x4", + argLen: 3, + generic: true, }, { - name: "MaskedAndUint64x8", - argLen: 3, - commutative: true, - generic: true, + name: "ShiftRightAndFillUpperFromMaskedUint64x4", + argLen: 4, + generic: true, }, { - name: "MaskedAndNotUint64x8", + name: "ShiftRightMaskedUint64x4", argLen: 3, generic: true, }, { - name: "MaskedEqualUint64x8", - argLen: 3, - commutative: true, - generic: true, + name: "ShiftRightSignExtendedUint64x4", + argLen: 2, + generic: true, }, { - name: "MaskedGreaterUint64x8", + name: "ShiftRightSignExtendedMaskedUint64x4", argLen: 3, generic: true, }, { - name: "MaskedGreaterEqualUint64x8", - argLen: 3, + name: "SubUint64x4", + argLen: 2, generic: true, }, { - name: "MaskedLessUint64x8", + name: "SubMaskedUint64x4", argLen: 3, generic: true, }, { - name: "MaskedLessEqualUint64x8", - argLen: 3, - generic: true, + name: "XorUint64x4", + argLen: 2, + commutative: true, + generic: true, }, { - name: "MaskedMaxUint64x8", + name: "XorMaskedUint64x4", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedMinUint64x8", - argLen: 3, + name: "AddUint64x8", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedMulEvenWidenUint64x8", + name: "AddMaskedUint64x8", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedNotEqualUint64x8", - argLen: 3, + name: "AndUint64x8", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedOrUint64x8", + name: "AndMaskedUint64x8", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedPopCountUint64x8", + name: "AndNotUint64x8", argLen: 2, generic: true, }, { - name: "MaskedRotateLeftUint64x8", + name: "AndNotMaskedUint64x8", argLen: 3, generic: true, }, { - name: "MaskedRotateRightUint64x8", - argLen: 3, - generic: true, + name: "EqualUint64x8", + argLen: 2, + commutative: true, + generic: true, }, { - name: "MaskedShiftAllLeftUint64x8", - argLen: 3, + name: "EqualMaskedUint64x8", + argLen: 3, + commutative: true, + generic: true, + }, + { + name: "GreaterUint64x8", + argLen: 2, generic: true, }, { - name: "MaskedShiftAllRightUint64x8", - argLen: 3, + name: "GreaterEqualUint64x8", + argLen: 2, generic: true, }, { - name: "MaskedShiftLeftUint64x8", + name: "GreaterEqualMaskedUint64x8", argLen: 3, generic: true, }, { - name: "MaskedShiftLeftAndFillUpperFromUint64x8", - argLen: 4, + name: "GreaterMaskedUint64x8", + argLen: 3, generic: true, }, { - name: "MaskedShiftRightUint64x8", - argLen: 3, + name: "LessUint64x8", + argLen: 2, generic: true, }, { - name: "MaskedShiftRightAndFillUpperFromUint64x8", - argLen: 4, + name: "LessEqualUint64x8", + argLen: 2, generic: true, }, { - name: "MaskedShiftRightSignExtendedUint64x8", + name: "LessEqualMaskedUint64x8", argLen: 3, generic: true, }, { - name: "MaskedSubUint64x8", + name: "LessMaskedUint64x8", argLen: 3, generic: true, }, { - name: "MaskedXorUint64x8", - argLen: 3, + name: "MaxUint64x8", + argLen: 2, commutative: true, generic: true, }, { - name: "MaxUint64x8", - argLen: 2, + name: "MaxMaskedUint64x8", + argLen: 3, commutative: true, generic: true, }, @@ -65767,49 +65682,98 @@ var opcodeTable = [...]opInfo{ commutative: true, generic: true, }, + { + name: "MinMaskedUint64x8", + argLen: 3, + commutative: true, + generic: true, + }, { name: "MulEvenWidenUint64x8", argLen: 2, commutative: true, generic: true, }, + { + name: "MulEvenWidenMaskedUint64x8", + argLen: 3, + commutative: true, + generic: true, + }, { name: "NotEqualUint64x8", argLen: 2, commutative: true, generic: true, }, + { + name: "NotEqualMaskedUint64x8", + argLen: 3, + commutative: true, + generic: true, + }, { name: "OrUint64x8", argLen: 2, commutative: true, generic: true, }, + { + name: "OrMaskedUint64x8", + argLen: 3, + commutative: true, + generic: true, + }, { name: "PopCountUint64x8", argLen: 1, generic: true, }, + { + name: "PopCountMaskedUint64x8", + argLen: 2, + generic: true, + }, { name: "RotateLeftUint64x8", argLen: 2, generic: true, }, + { + name: "RotateLeftMaskedUint64x8", + argLen: 3, + generic: true, + }, { name: "RotateRightUint64x8", argLen: 2, generic: true, }, + { + name: "RotateRightMaskedUint64x8", + argLen: 3, + generic: true, + }, { name: "ShiftAllLeftUint64x8", argLen: 2, generic: true, }, + { + name: "ShiftAllLeftMaskedUint64x8", + argLen: 3, + generic: true, + }, { name: "ShiftAllRightUint64x8", argLen: 2, generic: true, }, + { + name: "ShiftAllRightMaskedUint64x8", + argLen: 3, + generic: true, + }, { name: "ShiftLeftUint64x8", argLen: 2, @@ -65820,6 +65784,16 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, + { + name: "ShiftLeftAndFillUpperFromMaskedUint64x8", + argLen: 4, + generic: true, + }, + { + name: "ShiftLeftMaskedUint64x8", + argLen: 3, + generic: true, + }, { name: "ShiftRightUint64x8", argLen: 2, @@ -65830,28 +65804,60 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, + { + name: "ShiftRightAndFillUpperFromMaskedUint64x8", + argLen: 4, + generic: true, + }, + { + name: "ShiftRightMaskedUint64x8", + argLen: 3, + generic: true, + }, { name: "ShiftRightSignExtendedUint64x8", argLen: 2, generic: true, }, + { + name: "ShiftRightSignExtendedMaskedUint64x8", + argLen: 3, + generic: true, + }, { name: "SubUint64x8", argLen: 2, generic: true, }, + { + name: "SubMaskedUint64x8", + argLen: 3, + generic: true, + }, { name: "XorUint64x8", argLen: 2, commutative: true, generic: true, }, + { + name: "XorMaskedUint64x8", + argLen: 3, + commutative: true, + generic: true, + }, { name: "AddUint8x16", argLen: 2, commutative: true, generic: true, }, + { + name: "AddMaskedUint8x16", + argLen: 3, + commutative: true, + generic: true, + }, { name: "AndUint8x16", argLen: 2, @@ -65869,6 +65875,12 @@ var opcodeTable = [...]opInfo{ commutative: true, generic: true, }, + { + name: "AverageMaskedUint8x16", + argLen: 3, + commutative: true, + generic: true, + }, { name: "EqualUint8x16", argLen: 2, @@ -65876,132 +65888,94 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "GaloisFieldMulUint8x16", - argLen: 2, - generic: true, + name: "EqualMaskedUint8x16", + argLen: 3, + commutative: true, + generic: true, }, { - name: "GreaterUint8x16", + name: "GaloisFieldMulUint8x16", argLen: 2, generic: true, }, { - name: "GreaterEqualUint8x16", - argLen: 2, + name: "GaloisFieldMulMaskedUint8x16", + argLen: 3, generic: true, }, { - name: "LessUint8x16", + name: "GreaterUint8x16", argLen: 2, generic: true, }, { - name: "LessEqualUint8x16", + name: "GreaterEqualUint8x16", argLen: 2, generic: true, }, { - name: "MaskedAddUint8x16", - argLen: 3, - commutative: true, - generic: true, - }, - { - name: "MaskedAverageUint8x16", - argLen: 3, - commutative: true, - generic: true, - }, - { - name: "MaskedEqualUint8x16", - argLen: 3, - commutative: true, - generic: true, + name: "GreaterEqualMaskedUint8x16", + argLen: 3, + generic: true, }, { - name: "MaskedGaloisFieldMulUint8x16", + name: "GreaterMaskedUint8x16", argLen: 3, generic: true, }, { - name: "MaskedGreaterUint8x16", - argLen: 3, + name: "LessUint8x16", + argLen: 2, generic: true, }, { - name: "MaskedGreaterEqualUint8x16", - argLen: 3, + name: "LessEqualUint8x16", + argLen: 2, generic: true, }, { - name: "MaskedLessUint8x16", + name: "LessEqualMaskedUint8x16", argLen: 3, generic: true, }, { - name: "MaskedLessEqualUint8x16", + name: "LessMaskedUint8x16", argLen: 3, generic: true, }, { - name: "MaskedMaxUint8x16", - argLen: 3, + name: "MaxUint8x16", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedMinUint8x16", + name: "MaxMaskedUint8x16", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedNotEqualUint8x16", - argLen: 3, + name: "MinUint8x16", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedPopCountUint8x16", - argLen: 2, - generic: true, - }, - { - name: "MaskedSaturatedAddUint8x16", + name: "MinMaskedUint8x16", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedSaturatedSubUint8x16", - argLen: 3, - generic: true, - }, - { - name: "MaskedSaturatedUnsignedSignedPairDotProdUint8x16", - argLen: 3, - generic: true, - }, - { - name: "MaskedSubUint8x16", - argLen: 3, - generic: true, - }, - { - name: "MaxUint8x16", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "MinUint8x16", + name: "NotEqualUint8x16", argLen: 2, commutative: true, generic: true, }, { - name: "NotEqualUint8x16", - argLen: 2, + name: "NotEqualMaskedUint8x16", + argLen: 3, commutative: true, generic: true, }, @@ -66016,27 +65990,53 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "PopCountMaskedUint8x16", + argLen: 2, + generic: true, + }, { name: "SaturatedAddUint8x16", argLen: 2, commutative: true, generic: true, }, + { + name: "SaturatedAddMaskedUint8x16", + argLen: 3, + commutative: true, + generic: true, + }, { name: "SaturatedSubUint8x16", argLen: 2, generic: true, }, + { + name: "SaturatedSubMaskedUint8x16", + argLen: 3, + generic: true, + }, { name: "SaturatedUnsignedSignedPairDotProdUint8x16", argLen: 2, generic: true, }, + { + name: "SaturatedUnsignedSignedPairDotProdMaskedUint8x16", + argLen: 3, + generic: true, + }, { name: "SubUint8x16", argLen: 2, generic: true, }, + { + name: "SubMaskedUint8x16", + argLen: 3, + generic: true, + }, { name: "XorUint8x16", argLen: 2, @@ -66049,6 +66049,12 @@ var opcodeTable = [...]opInfo{ commutative: true, generic: true, }, + { + name: "AddMaskedUint8x32", + argLen: 3, + commutative: true, + generic: true, + }, { name: "AndUint8x32", argLen: 2, @@ -66066,6 +66072,12 @@ var opcodeTable = [...]opInfo{ commutative: true, generic: true, }, + { + name: "AverageMaskedUint8x32", + argLen: 3, + commutative: true, + generic: true, + }, { name: "EqualUint8x32", argLen: 2, @@ -66073,132 +66085,94 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "GaloisFieldMulUint8x32", - argLen: 2, - generic: true, + name: "EqualMaskedUint8x32", + argLen: 3, + commutative: true, + generic: true, }, { - name: "GreaterUint8x32", + name: "GaloisFieldMulUint8x32", argLen: 2, generic: true, }, { - name: "GreaterEqualUint8x32", - argLen: 2, + name: "GaloisFieldMulMaskedUint8x32", + argLen: 3, generic: true, }, { - name: "LessUint8x32", + name: "GreaterUint8x32", argLen: 2, generic: true, }, { - name: "LessEqualUint8x32", + name: "GreaterEqualUint8x32", argLen: 2, generic: true, }, { - name: "MaskedAddUint8x32", - argLen: 3, - commutative: true, - generic: true, - }, - { - name: "MaskedAverageUint8x32", - argLen: 3, - commutative: true, - generic: true, - }, - { - name: "MaskedEqualUint8x32", - argLen: 3, - commutative: true, - generic: true, + name: "GreaterEqualMaskedUint8x32", + argLen: 3, + generic: true, }, { - name: "MaskedGaloisFieldMulUint8x32", + name: "GreaterMaskedUint8x32", argLen: 3, generic: true, }, { - name: "MaskedGreaterUint8x32", - argLen: 3, + name: "LessUint8x32", + argLen: 2, generic: true, }, { - name: "MaskedGreaterEqualUint8x32", - argLen: 3, + name: "LessEqualUint8x32", + argLen: 2, generic: true, }, { - name: "MaskedLessUint8x32", + name: "LessEqualMaskedUint8x32", argLen: 3, generic: true, }, { - name: "MaskedLessEqualUint8x32", + name: "LessMaskedUint8x32", argLen: 3, generic: true, }, { - name: "MaskedMaxUint8x32", - argLen: 3, + name: "MaxUint8x32", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedMinUint8x32", + name: "MaxMaskedUint8x32", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedNotEqualUint8x32", - argLen: 3, + name: "MinUint8x32", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedPopCountUint8x32", - argLen: 2, - generic: true, - }, - { - name: "MaskedSaturatedAddUint8x32", + name: "MinMaskedUint8x32", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedSaturatedSubUint8x32", - argLen: 3, - generic: true, - }, - { - name: "MaskedSaturatedUnsignedSignedPairDotProdUint8x32", - argLen: 3, - generic: true, - }, - { - name: "MaskedSubUint8x32", - argLen: 3, - generic: true, - }, - { - name: "MaxUint8x32", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "MinUint8x32", + name: "NotEqualUint8x32", argLen: 2, commutative: true, generic: true, }, { - name: "NotEqualUint8x32", - argLen: 2, + name: "NotEqualMaskedUint8x32", + argLen: 3, commutative: true, generic: true, }, @@ -66213,27 +66187,53 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "PopCountMaskedUint8x32", + argLen: 2, + generic: true, + }, { name: "SaturatedAddUint8x32", argLen: 2, commutative: true, generic: true, }, + { + name: "SaturatedAddMaskedUint8x32", + argLen: 3, + commutative: true, + generic: true, + }, { name: "SaturatedSubUint8x32", argLen: 2, generic: true, }, + { + name: "SaturatedSubMaskedUint8x32", + argLen: 3, + generic: true, + }, { name: "SaturatedUnsignedSignedPairDotProdUint8x32", argLen: 2, generic: true, }, + { + name: "SaturatedUnsignedSignedPairDotProdMaskedUint8x32", + argLen: 3, + generic: true, + }, { name: "SubUint8x32", argLen: 2, generic: true, }, + { + name: "SubMaskedUint8x32", + argLen: 3, + generic: true, + }, { name: "XorUint8x32", argLen: 2, @@ -66246,12 +66246,24 @@ var opcodeTable = [...]opInfo{ commutative: true, generic: true, }, + { + name: "AddMaskedUint8x64", + argLen: 3, + commutative: true, + generic: true, + }, { name: "AverageUint8x64", argLen: 2, commutative: true, generic: true, }, + { + name: "AverageMaskedUint8x64", + argLen: 3, + commutative: true, + generic: true, + }, { name: "EqualUint8x64", argLen: 2, @@ -66259,138 +66271,105 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "GaloisFieldMulUint8x64", - argLen: 2, - generic: true, + name: "EqualMaskedUint8x64", + argLen: 3, + commutative: true, + generic: true, }, { - name: "GreaterUint8x64", + name: "GaloisFieldMulUint8x64", argLen: 2, generic: true, }, { - name: "GreaterEqualUint8x64", - argLen: 2, + name: "GaloisFieldMulMaskedUint8x64", + argLen: 3, generic: true, }, { - name: "LessUint8x64", + name: "GreaterUint8x64", argLen: 2, generic: true, }, { - name: "LessEqualUint8x64", + name: "GreaterEqualUint8x64", argLen: 2, generic: true, }, { - name: "MaskedAddUint8x64", - argLen: 3, - commutative: true, - generic: true, - }, - { - name: "MaskedAverageUint8x64", - argLen: 3, - commutative: true, - generic: true, - }, - { - name: "MaskedEqualUint8x64", - argLen: 3, - commutative: true, - generic: true, + name: "GreaterEqualMaskedUint8x64", + argLen: 3, + generic: true, }, { - name: "MaskedGaloisFieldMulUint8x64", + name: "GreaterMaskedUint8x64", argLen: 3, generic: true, }, { - name: "MaskedGreaterUint8x64", - argLen: 3, + name: "LessUint8x64", + argLen: 2, generic: true, }, { - name: "MaskedGreaterEqualUint8x64", - argLen: 3, + name: "LessEqualUint8x64", + argLen: 2, generic: true, }, { - name: "MaskedLessUint8x64", + name: "LessEqualMaskedUint8x64", argLen: 3, generic: true, }, { - name: "MaskedLessEqualUint8x64", + name: "LessMaskedUint8x64", argLen: 3, generic: true, }, { - name: "MaskedMaxUint8x64", - argLen: 3, + name: "MaxUint8x64", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedMinUint8x64", + name: "MaxMaskedUint8x64", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedNotEqualUint8x64", - argLen: 3, + name: "MinUint8x64", + argLen: 2, commutative: true, generic: true, }, { - name: "MaskedPopCountUint8x64", - argLen: 2, - generic: true, - }, - { - name: "MaskedSaturatedAddUint8x64", + name: "MinMaskedUint8x64", argLen: 3, commutative: true, generic: true, }, { - name: "MaskedSaturatedSubUint8x64", - argLen: 3, - generic: true, - }, - { - name: "MaskedSaturatedUnsignedSignedPairDotProdUint8x64", - argLen: 3, - generic: true, - }, - { - name: "MaskedSubUint8x64", - argLen: 3, - generic: true, - }, - { - name: "MaxUint8x64", + name: "NotEqualUint8x64", argLen: 2, commutative: true, generic: true, }, { - name: "MinUint8x64", - argLen: 2, + name: "NotEqualMaskedUint8x64", + argLen: 3, commutative: true, generic: true, }, { - name: "NotEqualUint8x64", - argLen: 2, - commutative: true, - generic: true, + name: "PopCountUint8x64", + argLen: 1, + generic: true, }, { - name: "PopCountUint8x64", - argLen: 1, + name: "PopCountMaskedUint8x64", + argLen: 2, generic: true, }, { @@ -66399,101 +66378,110 @@ var opcodeTable = [...]opInfo{ commutative: true, generic: true, }, + { + name: "SaturatedAddMaskedUint8x64", + argLen: 3, + commutative: true, + generic: true, + }, { name: "SaturatedSubUint8x64", argLen: 2, generic: true, }, + { + name: "SaturatedSubMaskedUint8x64", + argLen: 3, + generic: true, + }, { name: "SaturatedUnsignedSignedPairDotProdUint8x64", argLen: 2, generic: true, }, { - name: "SubUint8x64", - argLen: 2, + name: "SaturatedUnsignedSignedPairDotProdMaskedUint8x64", + argLen: 3, generic: true, }, { - name: "CeilWithPrecisionFloat32x16", - auxType: auxInt8, - argLen: 1, + name: "SubUint8x64", + argLen: 2, generic: true, }, { - name: "DiffWithCeilWithPrecisionFloat32x16", - auxType: auxInt8, - argLen: 1, + name: "SubMaskedUint8x64", + argLen: 3, generic: true, }, { - name: "DiffWithFloorWithPrecisionFloat32x16", + name: "CeilWithPrecisionFloat32x16", auxType: auxInt8, argLen: 1, generic: true, }, { - name: "DiffWithRoundWithPrecisionFloat32x16", + name: "CeilWithPrecisionMaskedFloat32x16", auxType: auxInt8, - argLen: 1, + argLen: 2, generic: true, }, { - name: "DiffWithTruncWithPrecisionFloat32x16", + name: "DiffWithCeilWithPrecisionFloat32x16", auxType: auxInt8, argLen: 1, generic: true, }, { - name: "FloorWithPrecisionFloat32x16", + name: "DiffWithCeilWithPrecisionMaskedFloat32x16", auxType: auxInt8, - argLen: 1, + argLen: 2, generic: true, }, { - name: "MaskedCeilWithPrecisionFloat32x16", + name: "DiffWithFloorWithPrecisionFloat32x16", auxType: auxInt8, - argLen: 2, + argLen: 1, generic: true, }, { - name: "MaskedDiffWithCeilWithPrecisionFloat32x16", + name: "DiffWithFloorWithPrecisionMaskedFloat32x16", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "MaskedDiffWithFloorWithPrecisionFloat32x16", + name: "DiffWithRoundWithPrecisionFloat32x16", auxType: auxInt8, - argLen: 2, + argLen: 1, generic: true, }, { - name: "MaskedDiffWithRoundWithPrecisionFloat32x16", + name: "DiffWithRoundWithPrecisionMaskedFloat32x16", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "MaskedDiffWithTruncWithPrecisionFloat32x16", + name: "DiffWithTruncWithPrecisionFloat32x16", auxType: auxInt8, - argLen: 2, + argLen: 1, generic: true, }, { - name: "MaskedFloorWithPrecisionFloat32x16", + name: "DiffWithTruncWithPrecisionMaskedFloat32x16", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "MaskedRoundWithPrecisionFloat32x16", + name: "FloorWithPrecisionFloat32x16", auxType: auxInt8, - argLen: 2, + argLen: 1, generic: true, }, { - name: "MaskedTruncWithPrecisionFloat32x16", + name: "FloorWithPrecisionMaskedFloat32x16", auxType: auxInt8, argLen: 2, generic: true, @@ -66505,91 +66493,91 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "TruncWithPrecisionFloat32x16", + name: "RoundWithPrecisionMaskedFloat32x16", auxType: auxInt8, - argLen: 1, + argLen: 2, generic: true, }, { - name: "CeilWithPrecisionFloat32x4", + name: "TruncWithPrecisionFloat32x16", auxType: auxInt8, argLen: 1, generic: true, }, { - name: "DiffWithCeilWithPrecisionFloat32x4", + name: "TruncWithPrecisionMaskedFloat32x16", auxType: auxInt8, - argLen: 1, + argLen: 2, generic: true, }, { - name: "DiffWithFloorWithPrecisionFloat32x4", + name: "CeilWithPrecisionFloat32x4", auxType: auxInt8, argLen: 1, generic: true, }, { - name: "DiffWithRoundWithPrecisionFloat32x4", + name: "CeilWithPrecisionMaskedFloat32x4", auxType: auxInt8, - argLen: 1, + argLen: 2, generic: true, }, { - name: "DiffWithTruncWithPrecisionFloat32x4", + name: "DiffWithCeilWithPrecisionFloat32x4", auxType: auxInt8, argLen: 1, generic: true, }, { - name: "FloorWithPrecisionFloat32x4", + name: "DiffWithCeilWithPrecisionMaskedFloat32x4", auxType: auxInt8, - argLen: 1, + argLen: 2, generic: true, }, { - name: "MaskedCeilWithPrecisionFloat32x4", + name: "DiffWithFloorWithPrecisionFloat32x4", auxType: auxInt8, - argLen: 2, + argLen: 1, generic: true, }, { - name: "MaskedDiffWithCeilWithPrecisionFloat32x4", + name: "DiffWithFloorWithPrecisionMaskedFloat32x4", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "MaskedDiffWithFloorWithPrecisionFloat32x4", + name: "DiffWithRoundWithPrecisionFloat32x4", auxType: auxInt8, - argLen: 2, + argLen: 1, generic: true, }, { - name: "MaskedDiffWithRoundWithPrecisionFloat32x4", + name: "DiffWithRoundWithPrecisionMaskedFloat32x4", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "MaskedDiffWithTruncWithPrecisionFloat32x4", + name: "DiffWithTruncWithPrecisionFloat32x4", auxType: auxInt8, - argLen: 2, + argLen: 1, generic: true, }, { - name: "MaskedFloorWithPrecisionFloat32x4", + name: "DiffWithTruncWithPrecisionMaskedFloat32x4", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "MaskedRoundWithPrecisionFloat32x4", + name: "FloorWithPrecisionFloat32x4", auxType: auxInt8, - argLen: 2, + argLen: 1, generic: true, }, { - name: "MaskedTruncWithPrecisionFloat32x4", + name: "FloorWithPrecisionMaskedFloat32x4", auxType: auxInt8, argLen: 2, generic: true, @@ -66601,99 +66589,99 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "TruncWithPrecisionFloat32x4", + name: "RoundWithPrecisionMaskedFloat32x4", auxType: auxInt8, - argLen: 1, + argLen: 2, generic: true, }, { - name: "CeilWithPrecisionFloat32x8", + name: "TruncWithPrecisionFloat32x4", auxType: auxInt8, argLen: 1, generic: true, }, { - name: "DiffWithCeilWithPrecisionFloat32x8", + name: "TruncWithPrecisionMaskedFloat32x4", auxType: auxInt8, - argLen: 1, + argLen: 2, generic: true, }, { - name: "DiffWithFloorWithPrecisionFloat32x8", + name: "CeilWithPrecisionFloat32x8", auxType: auxInt8, argLen: 1, generic: true, }, { - name: "DiffWithRoundWithPrecisionFloat32x8", + name: "CeilWithPrecisionMaskedFloat32x8", auxType: auxInt8, - argLen: 1, + argLen: 2, generic: true, }, { - name: "DiffWithTruncWithPrecisionFloat32x8", + name: "DiffWithCeilWithPrecisionFloat32x8", auxType: auxInt8, argLen: 1, generic: true, }, { - name: "FloorWithPrecisionFloat32x8", + name: "DiffWithCeilWithPrecisionMaskedFloat32x8", auxType: auxInt8, - argLen: 1, + argLen: 2, generic: true, }, { - name: "Get128Float32x8", + name: "DiffWithFloorWithPrecisionFloat32x8", auxType: auxInt8, argLen: 1, generic: true, }, { - name: "MaskedCeilWithPrecisionFloat32x8", + name: "DiffWithFloorWithPrecisionMaskedFloat32x8", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "MaskedDiffWithCeilWithPrecisionFloat32x8", + name: "DiffWithRoundWithPrecisionFloat32x8", auxType: auxInt8, - argLen: 2, + argLen: 1, generic: true, }, { - name: "MaskedDiffWithFloorWithPrecisionFloat32x8", + name: "DiffWithRoundWithPrecisionMaskedFloat32x8", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "MaskedDiffWithRoundWithPrecisionFloat32x8", + name: "DiffWithTruncWithPrecisionFloat32x8", auxType: auxInt8, - argLen: 2, + argLen: 1, generic: true, }, { - name: "MaskedDiffWithTruncWithPrecisionFloat32x8", + name: "DiffWithTruncWithPrecisionMaskedFloat32x8", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "MaskedFloorWithPrecisionFloat32x8", + name: "FloorWithPrecisionFloat32x8", auxType: auxInt8, - argLen: 2, + argLen: 1, generic: true, }, { - name: "MaskedRoundWithPrecisionFloat32x8", + name: "FloorWithPrecisionMaskedFloat32x8", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "MaskedTruncWithPrecisionFloat32x8", + name: "Get128Float32x8", auxType: auxInt8, - argLen: 2, + argLen: 1, generic: true, }, { @@ -66703,97 +66691,97 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "Set128Float32x8", + name: "RoundWithPrecisionMaskedFloat32x8", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "TruncWithPrecisionFloat32x8", + name: "Set128Float32x8", auxType: auxInt8, - argLen: 1, + argLen: 2, generic: true, }, { - name: "CeilWithPrecisionFloat64x2", + name: "TruncWithPrecisionFloat32x8", auxType: auxInt8, argLen: 1, generic: true, }, { - name: "DiffWithCeilWithPrecisionFloat64x2", + name: "TruncWithPrecisionMaskedFloat32x8", auxType: auxInt8, - argLen: 1, + argLen: 2, generic: true, }, { - name: "DiffWithFloorWithPrecisionFloat64x2", + name: "CeilWithPrecisionFloat64x2", auxType: auxInt8, argLen: 1, generic: true, }, { - name: "DiffWithRoundWithPrecisionFloat64x2", + name: "CeilWithPrecisionMaskedFloat64x2", auxType: auxInt8, - argLen: 1, + argLen: 2, generic: true, }, { - name: "DiffWithTruncWithPrecisionFloat64x2", + name: "DiffWithCeilWithPrecisionFloat64x2", auxType: auxInt8, argLen: 1, generic: true, }, { - name: "FloorWithPrecisionFloat64x2", + name: "DiffWithCeilWithPrecisionMaskedFloat64x2", auxType: auxInt8, - argLen: 1, + argLen: 2, generic: true, }, { - name: "MaskedCeilWithPrecisionFloat64x2", + name: "DiffWithFloorWithPrecisionFloat64x2", auxType: auxInt8, - argLen: 2, + argLen: 1, generic: true, }, { - name: "MaskedDiffWithCeilWithPrecisionFloat64x2", + name: "DiffWithFloorWithPrecisionMaskedFloat64x2", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "MaskedDiffWithFloorWithPrecisionFloat64x2", + name: "DiffWithRoundWithPrecisionFloat64x2", auxType: auxInt8, - argLen: 2, + argLen: 1, generic: true, }, { - name: "MaskedDiffWithRoundWithPrecisionFloat64x2", + name: "DiffWithRoundWithPrecisionMaskedFloat64x2", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "MaskedDiffWithTruncWithPrecisionFloat64x2", + name: "DiffWithTruncWithPrecisionFloat64x2", auxType: auxInt8, - argLen: 2, + argLen: 1, generic: true, }, { - name: "MaskedFloorWithPrecisionFloat64x2", + name: "DiffWithTruncWithPrecisionMaskedFloat64x2", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "MaskedRoundWithPrecisionFloat64x2", + name: "FloorWithPrecisionFloat64x2", auxType: auxInt8, - argLen: 2, + argLen: 1, generic: true, }, { - name: "MaskedTruncWithPrecisionFloat64x2", + name: "FloorWithPrecisionMaskedFloat64x2", auxType: auxInt8, argLen: 2, generic: true, @@ -66805,99 +66793,99 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "TruncWithPrecisionFloat64x2", + name: "RoundWithPrecisionMaskedFloat64x2", auxType: auxInt8, - argLen: 1, + argLen: 2, generic: true, }, { - name: "CeilWithPrecisionFloat64x4", + name: "TruncWithPrecisionFloat64x2", auxType: auxInt8, argLen: 1, generic: true, }, { - name: "DiffWithCeilWithPrecisionFloat64x4", + name: "TruncWithPrecisionMaskedFloat64x2", auxType: auxInt8, - argLen: 1, + argLen: 2, generic: true, }, { - name: "DiffWithFloorWithPrecisionFloat64x4", + name: "CeilWithPrecisionFloat64x4", auxType: auxInt8, argLen: 1, generic: true, }, { - name: "DiffWithRoundWithPrecisionFloat64x4", + name: "CeilWithPrecisionMaskedFloat64x4", auxType: auxInt8, - argLen: 1, + argLen: 2, generic: true, }, { - name: "DiffWithTruncWithPrecisionFloat64x4", + name: "DiffWithCeilWithPrecisionFloat64x4", auxType: auxInt8, argLen: 1, generic: true, }, { - name: "FloorWithPrecisionFloat64x4", + name: "DiffWithCeilWithPrecisionMaskedFloat64x4", auxType: auxInt8, - argLen: 1, + argLen: 2, generic: true, }, { - name: "Get128Float64x4", + name: "DiffWithFloorWithPrecisionFloat64x4", auxType: auxInt8, argLen: 1, generic: true, }, { - name: "MaskedCeilWithPrecisionFloat64x4", + name: "DiffWithFloorWithPrecisionMaskedFloat64x4", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "MaskedDiffWithCeilWithPrecisionFloat64x4", + name: "DiffWithRoundWithPrecisionFloat64x4", auxType: auxInt8, - argLen: 2, + argLen: 1, generic: true, }, { - name: "MaskedDiffWithFloorWithPrecisionFloat64x4", + name: "DiffWithRoundWithPrecisionMaskedFloat64x4", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "MaskedDiffWithRoundWithPrecisionFloat64x4", + name: "DiffWithTruncWithPrecisionFloat64x4", auxType: auxInt8, - argLen: 2, + argLen: 1, generic: true, }, { - name: "MaskedDiffWithTruncWithPrecisionFloat64x4", + name: "DiffWithTruncWithPrecisionMaskedFloat64x4", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "MaskedFloorWithPrecisionFloat64x4", + name: "FloorWithPrecisionFloat64x4", auxType: auxInt8, - argLen: 2, + argLen: 1, generic: true, }, { - name: "MaskedRoundWithPrecisionFloat64x4", + name: "FloorWithPrecisionMaskedFloat64x4", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "MaskedTruncWithPrecisionFloat64x4", + name: "Get128Float64x4", auxType: auxInt8, - argLen: 2, + argLen: 1, generic: true, }, { @@ -66907,97 +66895,97 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "Set128Float64x4", + name: "RoundWithPrecisionMaskedFloat64x4", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "TruncWithPrecisionFloat64x4", + name: "Set128Float64x4", auxType: auxInt8, - argLen: 1, + argLen: 2, generic: true, }, { - name: "CeilWithPrecisionFloat64x8", + name: "TruncWithPrecisionFloat64x4", auxType: auxInt8, argLen: 1, generic: true, }, { - name: "DiffWithCeilWithPrecisionFloat64x8", + name: "TruncWithPrecisionMaskedFloat64x4", auxType: auxInt8, - argLen: 1, + argLen: 2, generic: true, }, { - name: "DiffWithFloorWithPrecisionFloat64x8", + name: "CeilWithPrecisionFloat64x8", auxType: auxInt8, argLen: 1, generic: true, }, { - name: "DiffWithRoundWithPrecisionFloat64x8", + name: "CeilWithPrecisionMaskedFloat64x8", auxType: auxInt8, - argLen: 1, + argLen: 2, generic: true, }, { - name: "DiffWithTruncWithPrecisionFloat64x8", + name: "DiffWithCeilWithPrecisionFloat64x8", auxType: auxInt8, argLen: 1, generic: true, }, { - name: "FloorWithPrecisionFloat64x8", + name: "DiffWithCeilWithPrecisionMaskedFloat64x8", auxType: auxInt8, - argLen: 1, + argLen: 2, generic: true, }, { - name: "MaskedCeilWithPrecisionFloat64x8", + name: "DiffWithFloorWithPrecisionFloat64x8", auxType: auxInt8, - argLen: 2, + argLen: 1, generic: true, }, { - name: "MaskedDiffWithCeilWithPrecisionFloat64x8", + name: "DiffWithFloorWithPrecisionMaskedFloat64x8", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "MaskedDiffWithFloorWithPrecisionFloat64x8", + name: "DiffWithRoundWithPrecisionFloat64x8", auxType: auxInt8, - argLen: 2, + argLen: 1, generic: true, }, { - name: "MaskedDiffWithRoundWithPrecisionFloat64x8", + name: "DiffWithRoundWithPrecisionMaskedFloat64x8", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "MaskedDiffWithTruncWithPrecisionFloat64x8", + name: "DiffWithTruncWithPrecisionFloat64x8", auxType: auxInt8, - argLen: 2, + argLen: 1, generic: true, }, { - name: "MaskedFloorWithPrecisionFloat64x8", + name: "DiffWithTruncWithPrecisionMaskedFloat64x8", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "MaskedRoundWithPrecisionFloat64x8", + name: "FloorWithPrecisionFloat64x8", auxType: auxInt8, - argLen: 2, + argLen: 1, generic: true, }, { - name: "MaskedTruncWithPrecisionFloat64x8", + name: "FloorWithPrecisionMaskedFloat64x8", auxType: auxInt8, argLen: 2, generic: true, @@ -67009,27 +66997,27 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "TruncWithPrecisionFloat64x8", + name: "RoundWithPrecisionMaskedFloat64x8", auxType: auxInt8, - argLen: 1, + argLen: 2, generic: true, }, { - name: "Get128Int16x16", + name: "TruncWithPrecisionFloat64x8", auxType: auxInt8, argLen: 1, generic: true, }, { - name: "MaskedShiftAllLeftAndFillUpperFromInt16x16", + name: "TruncWithPrecisionMaskedFloat64x8", auxType: auxInt8, - argLen: 3, + argLen: 2, generic: true, }, { - name: "MaskedShiftAllRightAndFillUpperFromInt16x16", + name: "Get128Int16x16", auxType: auxInt8, - argLen: 3, + argLen: 1, generic: true, }, { @@ -67045,19 +67033,19 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "ShiftAllRightAndFillUpperFromInt16x16", + name: "ShiftAllLeftAndFillUpperFromMaskedInt16x16", auxType: auxInt8, - argLen: 2, + argLen: 3, generic: true, }, { - name: "MaskedShiftAllLeftAndFillUpperFromInt16x32", + name: "ShiftAllRightAndFillUpperFromInt16x16", auxType: auxInt8, - argLen: 3, + argLen: 2, generic: true, }, { - name: "MaskedShiftAllRightAndFillUpperFromInt16x32", + name: "ShiftAllRightAndFillUpperFromMaskedInt16x16", auxType: auxInt8, argLen: 3, generic: true, @@ -67069,27 +67057,27 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "ShiftAllRightAndFillUpperFromInt16x32", + name: "ShiftAllLeftAndFillUpperFromMaskedInt16x32", auxType: auxInt8, - argLen: 2, + argLen: 3, generic: true, }, { - name: "GetElemInt16x8", + name: "ShiftAllRightAndFillUpperFromInt16x32", auxType: auxInt8, - argLen: 1, + argLen: 2, generic: true, }, { - name: "MaskedShiftAllLeftAndFillUpperFromInt16x8", + name: "ShiftAllRightAndFillUpperFromMaskedInt16x32", auxType: auxInt8, argLen: 3, generic: true, }, { - name: "MaskedShiftAllRightAndFillUpperFromInt16x8", + name: "GetElemInt16x8", auxType: auxInt8, - argLen: 3, + argLen: 1, generic: true, }, { @@ -67105,39 +67093,33 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "ShiftAllRightAndFillUpperFromInt16x8", - auxType: auxInt8, - argLen: 2, - generic: true, - }, - { - name: "MaskedRotateAllLeftInt32x16", + name: "ShiftAllLeftAndFillUpperFromMaskedInt16x8", auxType: auxInt8, - argLen: 2, + argLen: 3, generic: true, }, { - name: "MaskedRotateAllRightInt32x16", + name: "ShiftAllRightAndFillUpperFromInt16x8", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "MaskedShiftAllLeftAndFillUpperFromInt32x16", + name: "ShiftAllRightAndFillUpperFromMaskedInt16x8", auxType: auxInt8, argLen: 3, generic: true, }, { - name: "MaskedShiftAllRightAndFillUpperFromInt32x16", + name: "RotateAllLeftInt32x16", auxType: auxInt8, - argLen: 3, + argLen: 1, generic: true, }, { - name: "RotateAllLeftInt32x16", + name: "RotateAllLeftMaskedInt32x16", auxType: auxInt8, - argLen: 1, + argLen: 2, generic: true, }, { @@ -67147,51 +67129,51 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "ShiftAllLeftAndFillUpperFromInt32x16", + name: "RotateAllRightMaskedInt32x16", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "ShiftAllRightAndFillUpperFromInt32x16", + name: "ShiftAllLeftAndFillUpperFromInt32x16", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "GetElemInt32x4", + name: "ShiftAllLeftAndFillUpperFromMaskedInt32x16", auxType: auxInt8, - argLen: 1, + argLen: 3, generic: true, }, { - name: "MaskedRotateAllLeftInt32x4", + name: "ShiftAllRightAndFillUpperFromInt32x16", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "MaskedRotateAllRightInt32x4", + name: "ShiftAllRightAndFillUpperFromMaskedInt32x16", auxType: auxInt8, - argLen: 2, + argLen: 3, generic: true, }, { - name: "MaskedShiftAllLeftAndFillUpperFromInt32x4", + name: "GetElemInt32x4", auxType: auxInt8, - argLen: 3, + argLen: 1, generic: true, }, { - name: "MaskedShiftAllRightAndFillUpperFromInt32x4", + name: "RotateAllLeftInt32x4", auxType: auxInt8, - argLen: 3, + argLen: 1, generic: true, }, { - name: "RotateAllLeftInt32x4", + name: "RotateAllLeftMaskedInt32x4", auxType: auxInt8, - argLen: 1, + argLen: 2, generic: true, }, { @@ -67201,57 +67183,57 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "SetElemInt32x4", + name: "RotateAllRightMaskedInt32x4", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "ShiftAllLeftAndFillUpperFromInt32x4", + name: "SetElemInt32x4", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "ShiftAllRightAndFillUpperFromInt32x4", + name: "ShiftAllLeftAndFillUpperFromInt32x4", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "Get128Int32x8", + name: "ShiftAllLeftAndFillUpperFromMaskedInt32x4", auxType: auxInt8, - argLen: 1, + argLen: 3, generic: true, }, { - name: "MaskedRotateAllLeftInt32x8", + name: "ShiftAllRightAndFillUpperFromInt32x4", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "MaskedRotateAllRightInt32x8", + name: "ShiftAllRightAndFillUpperFromMaskedInt32x4", auxType: auxInt8, - argLen: 2, + argLen: 3, generic: true, }, { - name: "MaskedShiftAllLeftAndFillUpperFromInt32x8", + name: "Get128Int32x8", auxType: auxInt8, - argLen: 3, + argLen: 1, generic: true, }, { - name: "MaskedShiftAllRightAndFillUpperFromInt32x8", + name: "RotateAllLeftInt32x8", auxType: auxInt8, - argLen: 3, + argLen: 1, generic: true, }, { - name: "RotateAllLeftInt32x8", + name: "RotateAllLeftMaskedInt32x8", auxType: auxInt8, - argLen: 1, + argLen: 2, generic: true, }, { @@ -67261,57 +67243,57 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "Set128Int32x8", + name: "RotateAllRightMaskedInt32x8", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "ShiftAllLeftAndFillUpperFromInt32x8", + name: "Set128Int32x8", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "ShiftAllRightAndFillUpperFromInt32x8", + name: "ShiftAllLeftAndFillUpperFromInt32x8", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "GetElemInt64x2", + name: "ShiftAllLeftAndFillUpperFromMaskedInt32x8", auxType: auxInt8, - argLen: 1, + argLen: 3, generic: true, }, { - name: "MaskedRotateAllLeftInt64x2", + name: "ShiftAllRightAndFillUpperFromInt32x8", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "MaskedRotateAllRightInt64x2", + name: "ShiftAllRightAndFillUpperFromMaskedInt32x8", auxType: auxInt8, - argLen: 2, + argLen: 3, generic: true, }, { - name: "MaskedShiftAllLeftAndFillUpperFromInt64x2", + name: "GetElemInt64x2", auxType: auxInt8, - argLen: 3, + argLen: 1, generic: true, }, { - name: "MaskedShiftAllRightAndFillUpperFromInt64x2", + name: "RotateAllLeftInt64x2", auxType: auxInt8, - argLen: 3, + argLen: 1, generic: true, }, { - name: "RotateAllLeftInt64x2", + name: "RotateAllLeftMaskedInt64x2", auxType: auxInt8, - argLen: 1, + argLen: 2, generic: true, }, { @@ -67321,57 +67303,57 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "SetElemInt64x2", + name: "RotateAllRightMaskedInt64x2", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "ShiftAllLeftAndFillUpperFromInt64x2", + name: "SetElemInt64x2", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "ShiftAllRightAndFillUpperFromInt64x2", + name: "ShiftAllLeftAndFillUpperFromInt64x2", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "Get128Int64x4", + name: "ShiftAllLeftAndFillUpperFromMaskedInt64x2", auxType: auxInt8, - argLen: 1, + argLen: 3, generic: true, }, { - name: "MaskedRotateAllLeftInt64x4", + name: "ShiftAllRightAndFillUpperFromInt64x2", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "MaskedRotateAllRightInt64x4", + name: "ShiftAllRightAndFillUpperFromMaskedInt64x2", auxType: auxInt8, - argLen: 2, + argLen: 3, generic: true, }, { - name: "MaskedShiftAllLeftAndFillUpperFromInt64x4", + name: "Get128Int64x4", auxType: auxInt8, - argLen: 3, + argLen: 1, generic: true, }, { - name: "MaskedShiftAllRightAndFillUpperFromInt64x4", + name: "RotateAllLeftInt64x4", auxType: auxInt8, - argLen: 3, + argLen: 1, generic: true, }, { - name: "RotateAllLeftInt64x4", + name: "RotateAllLeftMaskedInt64x4", auxType: auxInt8, - argLen: 1, + argLen: 2, generic: true, }, { @@ -67381,51 +67363,51 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "Set128Int64x4", + name: "RotateAllRightMaskedInt64x4", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "ShiftAllLeftAndFillUpperFromInt64x4", + name: "Set128Int64x4", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "ShiftAllRightAndFillUpperFromInt64x4", + name: "ShiftAllLeftAndFillUpperFromInt64x4", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "MaskedRotateAllLeftInt64x8", + name: "ShiftAllLeftAndFillUpperFromMaskedInt64x4", auxType: auxInt8, - argLen: 2, + argLen: 3, generic: true, }, { - name: "MaskedRotateAllRightInt64x8", + name: "ShiftAllRightAndFillUpperFromInt64x4", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "MaskedShiftAllLeftAndFillUpperFromInt64x8", + name: "ShiftAllRightAndFillUpperFromMaskedInt64x4", auxType: auxInt8, argLen: 3, generic: true, }, { - name: "MaskedShiftAllRightAndFillUpperFromInt64x8", + name: "RotateAllLeftInt64x8", auxType: auxInt8, - argLen: 3, + argLen: 1, generic: true, }, { - name: "RotateAllLeftInt64x8", + name: "RotateAllLeftMaskedInt64x8", auxType: auxInt8, - argLen: 1, + argLen: 2, generic: true, }, { @@ -67435,57 +67417,63 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "ShiftAllLeftAndFillUpperFromInt64x8", + name: "RotateAllRightMaskedInt64x8", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "ShiftAllRightAndFillUpperFromInt64x8", + name: "ShiftAllLeftAndFillUpperFromInt64x8", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "GetElemInt8x16", + name: "ShiftAllLeftAndFillUpperFromMaskedInt64x8", auxType: auxInt8, - argLen: 1, + argLen: 3, generic: true, }, { - name: "SetElemInt8x16", + name: "ShiftAllRightAndFillUpperFromInt64x8", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "Get128Int8x32", + name: "ShiftAllRightAndFillUpperFromMaskedInt64x8", + auxType: auxInt8, + argLen: 3, + generic: true, + }, + { + name: "GetElemInt8x16", auxType: auxInt8, argLen: 1, generic: true, }, { - name: "Set128Int8x32", + name: "SetElemInt8x16", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "Get128Uint16x16", + name: "Get128Int8x32", auxType: auxInt8, argLen: 1, generic: true, }, { - name: "MaskedShiftAllLeftAndFillUpperFromUint16x16", + name: "Set128Int8x32", auxType: auxInt8, - argLen: 3, + argLen: 2, generic: true, }, { - name: "MaskedShiftAllRightAndFillUpperFromUint16x16", + name: "Get128Uint16x16", auxType: auxInt8, - argLen: 3, + argLen: 1, generic: true, }, { @@ -67501,19 +67489,19 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "ShiftAllRightAndFillUpperFromUint16x16", + name: "ShiftAllLeftAndFillUpperFromMaskedUint16x16", auxType: auxInt8, - argLen: 2, + argLen: 3, generic: true, }, { - name: "MaskedShiftAllLeftAndFillUpperFromUint16x32", + name: "ShiftAllRightAndFillUpperFromUint16x16", auxType: auxInt8, - argLen: 3, + argLen: 2, generic: true, }, { - name: "MaskedShiftAllRightAndFillUpperFromUint16x32", + name: "ShiftAllRightAndFillUpperFromMaskedUint16x16", auxType: auxInt8, argLen: 3, generic: true, @@ -67525,27 +67513,27 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "ShiftAllRightAndFillUpperFromUint16x32", + name: "ShiftAllLeftAndFillUpperFromMaskedUint16x32", auxType: auxInt8, - argLen: 2, + argLen: 3, generic: true, }, { - name: "GetElemUint16x8", + name: "ShiftAllRightAndFillUpperFromUint16x32", auxType: auxInt8, - argLen: 1, + argLen: 2, generic: true, }, { - name: "MaskedShiftAllLeftAndFillUpperFromUint16x8", + name: "ShiftAllRightAndFillUpperFromMaskedUint16x32", auxType: auxInt8, argLen: 3, generic: true, }, { - name: "MaskedShiftAllRightAndFillUpperFromUint16x8", + name: "GetElemUint16x8", auxType: auxInt8, - argLen: 3, + argLen: 1, generic: true, }, { @@ -67561,39 +67549,33 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "ShiftAllRightAndFillUpperFromUint16x8", - auxType: auxInt8, - argLen: 2, - generic: true, - }, - { - name: "MaskedRotateAllLeftUint32x16", + name: "ShiftAllLeftAndFillUpperFromMaskedUint16x8", auxType: auxInt8, - argLen: 2, + argLen: 3, generic: true, }, { - name: "MaskedRotateAllRightUint32x16", + name: "ShiftAllRightAndFillUpperFromUint16x8", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "MaskedShiftAllLeftAndFillUpperFromUint32x16", + name: "ShiftAllRightAndFillUpperFromMaskedUint16x8", auxType: auxInt8, argLen: 3, generic: true, }, { - name: "MaskedShiftAllRightAndFillUpperFromUint32x16", + name: "RotateAllLeftUint32x16", auxType: auxInt8, - argLen: 3, + argLen: 1, generic: true, }, { - name: "RotateAllLeftUint32x16", + name: "RotateAllLeftMaskedUint32x16", auxType: auxInt8, - argLen: 1, + argLen: 2, generic: true, }, { @@ -67603,51 +67585,51 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "ShiftAllLeftAndFillUpperFromUint32x16", + name: "RotateAllRightMaskedUint32x16", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "ShiftAllRightAndFillUpperFromUint32x16", + name: "ShiftAllLeftAndFillUpperFromUint32x16", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "GetElemUint32x4", + name: "ShiftAllLeftAndFillUpperFromMaskedUint32x16", auxType: auxInt8, - argLen: 1, + argLen: 3, generic: true, }, { - name: "MaskedRotateAllLeftUint32x4", + name: "ShiftAllRightAndFillUpperFromUint32x16", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "MaskedRotateAllRightUint32x4", + name: "ShiftAllRightAndFillUpperFromMaskedUint32x16", auxType: auxInt8, - argLen: 2, + argLen: 3, generic: true, }, { - name: "MaskedShiftAllLeftAndFillUpperFromUint32x4", + name: "GetElemUint32x4", auxType: auxInt8, - argLen: 3, + argLen: 1, generic: true, }, { - name: "MaskedShiftAllRightAndFillUpperFromUint32x4", + name: "RotateAllLeftUint32x4", auxType: auxInt8, - argLen: 3, + argLen: 1, generic: true, }, { - name: "RotateAllLeftUint32x4", + name: "RotateAllLeftMaskedUint32x4", auxType: auxInt8, - argLen: 1, + argLen: 2, generic: true, }, { @@ -67657,57 +67639,57 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "SetElemUint32x4", + name: "RotateAllRightMaskedUint32x4", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "ShiftAllLeftAndFillUpperFromUint32x4", + name: "SetElemUint32x4", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "ShiftAllRightAndFillUpperFromUint32x4", + name: "ShiftAllLeftAndFillUpperFromUint32x4", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "Get128Uint32x8", + name: "ShiftAllLeftAndFillUpperFromMaskedUint32x4", auxType: auxInt8, - argLen: 1, + argLen: 3, generic: true, }, { - name: "MaskedRotateAllLeftUint32x8", + name: "ShiftAllRightAndFillUpperFromUint32x4", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "MaskedRotateAllRightUint32x8", + name: "ShiftAllRightAndFillUpperFromMaskedUint32x4", auxType: auxInt8, - argLen: 2, + argLen: 3, generic: true, }, { - name: "MaskedShiftAllLeftAndFillUpperFromUint32x8", + name: "Get128Uint32x8", auxType: auxInt8, - argLen: 3, + argLen: 1, generic: true, }, { - name: "MaskedShiftAllRightAndFillUpperFromUint32x8", + name: "RotateAllLeftUint32x8", auxType: auxInt8, - argLen: 3, + argLen: 1, generic: true, }, { - name: "RotateAllLeftUint32x8", + name: "RotateAllLeftMaskedUint32x8", auxType: auxInt8, - argLen: 1, + argLen: 2, generic: true, }, { @@ -67717,57 +67699,57 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "Set128Uint32x8", + name: "RotateAllRightMaskedUint32x8", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "ShiftAllLeftAndFillUpperFromUint32x8", + name: "Set128Uint32x8", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "ShiftAllRightAndFillUpperFromUint32x8", + name: "ShiftAllLeftAndFillUpperFromUint32x8", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "GetElemUint64x2", + name: "ShiftAllLeftAndFillUpperFromMaskedUint32x8", auxType: auxInt8, - argLen: 1, + argLen: 3, generic: true, }, { - name: "MaskedRotateAllLeftUint64x2", + name: "ShiftAllRightAndFillUpperFromUint32x8", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "MaskedRotateAllRightUint64x2", + name: "ShiftAllRightAndFillUpperFromMaskedUint32x8", auxType: auxInt8, - argLen: 2, + argLen: 3, generic: true, }, { - name: "MaskedShiftAllLeftAndFillUpperFromUint64x2", + name: "GetElemUint64x2", auxType: auxInt8, - argLen: 3, + argLen: 1, generic: true, }, { - name: "MaskedShiftAllRightAndFillUpperFromUint64x2", + name: "RotateAllLeftUint64x2", auxType: auxInt8, - argLen: 3, + argLen: 1, generic: true, }, { - name: "RotateAllLeftUint64x2", + name: "RotateAllLeftMaskedUint64x2", auxType: auxInt8, - argLen: 1, + argLen: 2, generic: true, }, { @@ -67777,57 +67759,57 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "SetElemUint64x2", + name: "RotateAllRightMaskedUint64x2", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "ShiftAllLeftAndFillUpperFromUint64x2", + name: "SetElemUint64x2", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "ShiftAllRightAndFillUpperFromUint64x2", + name: "ShiftAllLeftAndFillUpperFromUint64x2", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "Get128Uint64x4", + name: "ShiftAllLeftAndFillUpperFromMaskedUint64x2", auxType: auxInt8, - argLen: 1, + argLen: 3, generic: true, }, { - name: "MaskedRotateAllLeftUint64x4", + name: "ShiftAllRightAndFillUpperFromUint64x2", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "MaskedRotateAllRightUint64x4", + name: "ShiftAllRightAndFillUpperFromMaskedUint64x2", auxType: auxInt8, - argLen: 2, + argLen: 3, generic: true, }, { - name: "MaskedShiftAllLeftAndFillUpperFromUint64x4", + name: "Get128Uint64x4", auxType: auxInt8, - argLen: 3, + argLen: 1, generic: true, }, { - name: "MaskedShiftAllRightAndFillUpperFromUint64x4", + name: "RotateAllLeftUint64x4", auxType: auxInt8, - argLen: 3, + argLen: 1, generic: true, }, { - name: "RotateAllLeftUint64x4", + name: "RotateAllLeftMaskedUint64x4", auxType: auxInt8, - argLen: 1, + argLen: 2, generic: true, }, { @@ -67837,51 +67819,51 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "Set128Uint64x4", + name: "RotateAllRightMaskedUint64x4", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "ShiftAllLeftAndFillUpperFromUint64x4", + name: "Set128Uint64x4", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "ShiftAllRightAndFillUpperFromUint64x4", + name: "ShiftAllLeftAndFillUpperFromUint64x4", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "MaskedRotateAllLeftUint64x8", + name: "ShiftAllLeftAndFillUpperFromMaskedUint64x4", auxType: auxInt8, - argLen: 2, + argLen: 3, generic: true, }, { - name: "MaskedRotateAllRightUint64x8", + name: "ShiftAllRightAndFillUpperFromUint64x4", auxType: auxInt8, argLen: 2, generic: true, }, { - name: "MaskedShiftAllLeftAndFillUpperFromUint64x8", + name: "ShiftAllRightAndFillUpperFromMaskedUint64x4", auxType: auxInt8, argLen: 3, generic: true, }, { - name: "MaskedShiftAllRightAndFillUpperFromUint64x8", + name: "RotateAllLeftUint64x8", auxType: auxInt8, - argLen: 3, + argLen: 1, generic: true, }, { - name: "RotateAllLeftUint64x8", + name: "RotateAllLeftMaskedUint64x8", auxType: auxInt8, - argLen: 1, + argLen: 2, generic: true, }, { @@ -67890,18 +67872,36 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "RotateAllRightMaskedUint64x8", + auxType: auxInt8, + argLen: 2, + generic: true, + }, { name: "ShiftAllLeftAndFillUpperFromUint64x8", auxType: auxInt8, argLen: 2, generic: true, }, + { + name: "ShiftAllLeftAndFillUpperFromMaskedUint64x8", + auxType: auxInt8, + argLen: 3, + generic: true, + }, { name: "ShiftAllRightAndFillUpperFromUint64x8", auxType: auxInt8, argLen: 2, generic: true, }, + { + name: "ShiftAllRightAndFillUpperFromMaskedUint64x8", + auxType: auxInt8, + argLen: 3, + generic: true, + }, { name: "GaloisFieldAffineTransformUint8x16", auxType: auxInt8, @@ -67915,21 +67915,21 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "GetElemUint8x16", + name: "GaloisFieldAffineTransformInversedMaskedUint8x16", auxType: auxInt8, - argLen: 1, + argLen: 3, generic: true, }, { - name: "MaskedGaloisFieldAffineTransformUint8x16", + name: "GaloisFieldAffineTransformMaskedUint8x16", auxType: auxInt8, argLen: 3, generic: true, }, { - name: "MaskedGaloisFieldAffineTransformInversedUint8x16", + name: "GetElemUint8x16", auxType: auxInt8, - argLen: 3, + argLen: 1, generic: true, }, { @@ -67951,21 +67951,21 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "Get128Uint8x32", + name: "GaloisFieldAffineTransformInversedMaskedUint8x32", auxType: auxInt8, - argLen: 1, + argLen: 3, generic: true, }, { - name: "MaskedGaloisFieldAffineTransformUint8x32", + name: "GaloisFieldAffineTransformMaskedUint8x32", auxType: auxInt8, argLen: 3, generic: true, }, { - name: "MaskedGaloisFieldAffineTransformInversedUint8x32", + name: "Get128Uint8x32", auxType: auxInt8, - argLen: 3, + argLen: 1, generic: true, }, { @@ -67987,13 +67987,13 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "MaskedGaloisFieldAffineTransformUint8x64", + name: "GaloisFieldAffineTransformInversedMaskedUint8x64", auxType: auxInt8, argLen: 3, generic: true, }, { - name: "MaskedGaloisFieldAffineTransformInversedUint8x64", + name: "GaloisFieldAffineTransformMaskedUint8x64", auxType: auxInt8, argLen: 3, generic: true, diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index 2e6a9dfaec..2e27077e81 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -589,6 +589,30 @@ func rewriteValueAMD64(v *Value) bool { case OpAbsoluteInt8x64: v.Op = OpAMD64VPABSB512 return true + case OpAbsoluteMaskedInt16x16: + return rewriteValueAMD64_OpAbsoluteMaskedInt16x16(v) + case OpAbsoluteMaskedInt16x32: + return rewriteValueAMD64_OpAbsoluteMaskedInt16x32(v) + case OpAbsoluteMaskedInt16x8: + return rewriteValueAMD64_OpAbsoluteMaskedInt16x8(v) + case OpAbsoluteMaskedInt32x16: + return rewriteValueAMD64_OpAbsoluteMaskedInt32x16(v) + case OpAbsoluteMaskedInt32x4: + return rewriteValueAMD64_OpAbsoluteMaskedInt32x4(v) + case OpAbsoluteMaskedInt32x8: + return rewriteValueAMD64_OpAbsoluteMaskedInt32x8(v) + case OpAbsoluteMaskedInt64x2: + return rewriteValueAMD64_OpAbsoluteMaskedInt64x2(v) + case OpAbsoluteMaskedInt64x4: + return rewriteValueAMD64_OpAbsoluteMaskedInt64x4(v) + case OpAbsoluteMaskedInt64x8: + return rewriteValueAMD64_OpAbsoluteMaskedInt64x8(v) + case OpAbsoluteMaskedInt8x16: + return rewriteValueAMD64_OpAbsoluteMaskedInt8x16(v) + case OpAbsoluteMaskedInt8x32: + return rewriteValueAMD64_OpAbsoluteMaskedInt8x32(v) + case OpAbsoluteMaskedInt8x64: + return rewriteValueAMD64_OpAbsoluteMaskedInt8x64(v) case OpAdd16: v.Op = OpAMD64ADDL return true @@ -661,6 +685,66 @@ func rewriteValueAMD64(v *Value) bool { case OpAddInt8x64: v.Op = OpAMD64VPADDB512 return true + case OpAddMaskedFloat32x16: + return rewriteValueAMD64_OpAddMaskedFloat32x16(v) + case OpAddMaskedFloat32x4: + return rewriteValueAMD64_OpAddMaskedFloat32x4(v) + case OpAddMaskedFloat32x8: + return rewriteValueAMD64_OpAddMaskedFloat32x8(v) + case OpAddMaskedFloat64x2: + return rewriteValueAMD64_OpAddMaskedFloat64x2(v) + case OpAddMaskedFloat64x4: + return rewriteValueAMD64_OpAddMaskedFloat64x4(v) + case OpAddMaskedFloat64x8: + return rewriteValueAMD64_OpAddMaskedFloat64x8(v) + case OpAddMaskedInt16x16: + return rewriteValueAMD64_OpAddMaskedInt16x16(v) + case OpAddMaskedInt16x32: + return rewriteValueAMD64_OpAddMaskedInt16x32(v) + case OpAddMaskedInt16x8: + return rewriteValueAMD64_OpAddMaskedInt16x8(v) + case OpAddMaskedInt32x16: + return rewriteValueAMD64_OpAddMaskedInt32x16(v) + case OpAddMaskedInt32x4: + return rewriteValueAMD64_OpAddMaskedInt32x4(v) + case OpAddMaskedInt32x8: + return rewriteValueAMD64_OpAddMaskedInt32x8(v) + case OpAddMaskedInt64x2: + return rewriteValueAMD64_OpAddMaskedInt64x2(v) + case OpAddMaskedInt64x4: + return rewriteValueAMD64_OpAddMaskedInt64x4(v) + case OpAddMaskedInt64x8: + return rewriteValueAMD64_OpAddMaskedInt64x8(v) + case OpAddMaskedInt8x16: + return rewriteValueAMD64_OpAddMaskedInt8x16(v) + case OpAddMaskedInt8x32: + return rewriteValueAMD64_OpAddMaskedInt8x32(v) + case OpAddMaskedInt8x64: + return rewriteValueAMD64_OpAddMaskedInt8x64(v) + case OpAddMaskedUint16x16: + return rewriteValueAMD64_OpAddMaskedUint16x16(v) + case OpAddMaskedUint16x32: + return rewriteValueAMD64_OpAddMaskedUint16x32(v) + case OpAddMaskedUint16x8: + return rewriteValueAMD64_OpAddMaskedUint16x8(v) + case OpAddMaskedUint32x16: + return rewriteValueAMD64_OpAddMaskedUint32x16(v) + case OpAddMaskedUint32x4: + return rewriteValueAMD64_OpAddMaskedUint32x4(v) + case OpAddMaskedUint32x8: + return rewriteValueAMD64_OpAddMaskedUint32x8(v) + case OpAddMaskedUint64x2: + return rewriteValueAMD64_OpAddMaskedUint64x2(v) + case OpAddMaskedUint64x4: + return rewriteValueAMD64_OpAddMaskedUint64x4(v) + case OpAddMaskedUint64x8: + return rewriteValueAMD64_OpAddMaskedUint64x8(v) + case OpAddMaskedUint8x16: + return rewriteValueAMD64_OpAddMaskedUint8x16(v) + case OpAddMaskedUint8x32: + return rewriteValueAMD64_OpAddMaskedUint8x32(v) + case OpAddMaskedUint8x64: + return rewriteValueAMD64_OpAddMaskedUint8x64(v) case OpAddPtr: v.Op = OpAMD64ADDQ return true @@ -759,6 +843,30 @@ func rewriteValueAMD64(v *Value) bool { case OpAndInt8x32: v.Op = OpAMD64VPAND256 return true + case OpAndMaskedInt32x16: + return rewriteValueAMD64_OpAndMaskedInt32x16(v) + case OpAndMaskedInt32x4: + return rewriteValueAMD64_OpAndMaskedInt32x4(v) + case OpAndMaskedInt32x8: + return rewriteValueAMD64_OpAndMaskedInt32x8(v) + case OpAndMaskedInt64x2: + return rewriteValueAMD64_OpAndMaskedInt64x2(v) + case OpAndMaskedInt64x4: + return rewriteValueAMD64_OpAndMaskedInt64x4(v) + case OpAndMaskedInt64x8: + return rewriteValueAMD64_OpAndMaskedInt64x8(v) + case OpAndMaskedUint32x16: + return rewriteValueAMD64_OpAndMaskedUint32x16(v) + case OpAndMaskedUint32x4: + return rewriteValueAMD64_OpAndMaskedUint32x4(v) + case OpAndMaskedUint32x8: + return rewriteValueAMD64_OpAndMaskedUint32x8(v) + case OpAndMaskedUint64x2: + return rewriteValueAMD64_OpAndMaskedUint64x2(v) + case OpAndMaskedUint64x4: + return rewriteValueAMD64_OpAndMaskedUint64x4(v) + case OpAndMaskedUint64x8: + return rewriteValueAMD64_OpAndMaskedUint64x8(v) case OpAndNotInt16x16: v.Op = OpAMD64VPANDN256 return true @@ -789,6 +897,30 @@ func rewriteValueAMD64(v *Value) bool { case OpAndNotInt8x32: v.Op = OpAMD64VPANDN256 return true + case OpAndNotMaskedInt32x16: + return rewriteValueAMD64_OpAndNotMaskedInt32x16(v) + case OpAndNotMaskedInt32x4: + return rewriteValueAMD64_OpAndNotMaskedInt32x4(v) + case OpAndNotMaskedInt32x8: + return rewriteValueAMD64_OpAndNotMaskedInt32x8(v) + case OpAndNotMaskedInt64x2: + return rewriteValueAMD64_OpAndNotMaskedInt64x2(v) + case OpAndNotMaskedInt64x4: + return rewriteValueAMD64_OpAndNotMaskedInt64x4(v) + case OpAndNotMaskedInt64x8: + return rewriteValueAMD64_OpAndNotMaskedInt64x8(v) + case OpAndNotMaskedUint32x16: + return rewriteValueAMD64_OpAndNotMaskedUint32x16(v) + case OpAndNotMaskedUint32x4: + return rewriteValueAMD64_OpAndNotMaskedUint32x4(v) + case OpAndNotMaskedUint32x8: + return rewriteValueAMD64_OpAndNotMaskedUint32x8(v) + case OpAndNotMaskedUint64x2: + return rewriteValueAMD64_OpAndNotMaskedUint64x2(v) + case OpAndNotMaskedUint64x4: + return rewriteValueAMD64_OpAndNotMaskedUint64x4(v) + case OpAndNotMaskedUint64x8: + return rewriteValueAMD64_OpAndNotMaskedUint64x8(v) case OpAndNotUint16x16: v.Op = OpAMD64VPANDN256 return true @@ -867,6 +999,18 @@ func rewriteValueAMD64(v *Value) bool { case OpApproximateReciprocalFloat64x8: v.Op = OpAMD64VRCP14PD512 return true + case OpApproximateReciprocalMaskedFloat32x16: + return rewriteValueAMD64_OpApproximateReciprocalMaskedFloat32x16(v) + case OpApproximateReciprocalMaskedFloat32x4: + return rewriteValueAMD64_OpApproximateReciprocalMaskedFloat32x4(v) + case OpApproximateReciprocalMaskedFloat32x8: + return rewriteValueAMD64_OpApproximateReciprocalMaskedFloat32x8(v) + case OpApproximateReciprocalMaskedFloat64x2: + return rewriteValueAMD64_OpApproximateReciprocalMaskedFloat64x2(v) + case OpApproximateReciprocalMaskedFloat64x4: + return rewriteValueAMD64_OpApproximateReciprocalMaskedFloat64x4(v) + case OpApproximateReciprocalMaskedFloat64x8: + return rewriteValueAMD64_OpApproximateReciprocalMaskedFloat64x8(v) case OpApproximateReciprocalOfSqrtFloat32x16: v.Op = OpAMD64VRSQRT14PS512 return true @@ -885,6 +1029,18 @@ func rewriteValueAMD64(v *Value) bool { case OpApproximateReciprocalOfSqrtFloat64x8: v.Op = OpAMD64VRSQRT14PD512 return true + case OpApproximateReciprocalOfSqrtMaskedFloat32x16: + return rewriteValueAMD64_OpApproximateReciprocalOfSqrtMaskedFloat32x16(v) + case OpApproximateReciprocalOfSqrtMaskedFloat32x4: + return rewriteValueAMD64_OpApproximateReciprocalOfSqrtMaskedFloat32x4(v) + case OpApproximateReciprocalOfSqrtMaskedFloat32x8: + return rewriteValueAMD64_OpApproximateReciprocalOfSqrtMaskedFloat32x8(v) + case OpApproximateReciprocalOfSqrtMaskedFloat64x2: + return rewriteValueAMD64_OpApproximateReciprocalOfSqrtMaskedFloat64x2(v) + case OpApproximateReciprocalOfSqrtMaskedFloat64x4: + return rewriteValueAMD64_OpApproximateReciprocalOfSqrtMaskedFloat64x4(v) + case OpApproximateReciprocalOfSqrtMaskedFloat64x8: + return rewriteValueAMD64_OpApproximateReciprocalOfSqrtMaskedFloat64x8(v) case OpAtomicAdd32: return rewriteValueAMD64_OpAtomicAdd32(v) case OpAtomicAdd64: @@ -931,6 +1087,18 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAtomicStore8(v) case OpAtomicStorePtrNoWB: return rewriteValueAMD64_OpAtomicStorePtrNoWB(v) + case OpAverageMaskedUint16x16: + return rewriteValueAMD64_OpAverageMaskedUint16x16(v) + case OpAverageMaskedUint16x32: + return rewriteValueAMD64_OpAverageMaskedUint16x32(v) + case OpAverageMaskedUint16x8: + return rewriteValueAMD64_OpAverageMaskedUint16x8(v) + case OpAverageMaskedUint8x16: + return rewriteValueAMD64_OpAverageMaskedUint8x16(v) + case OpAverageMaskedUint8x32: + return rewriteValueAMD64_OpAverageMaskedUint8x32(v) + case OpAverageMaskedUint8x64: + return rewriteValueAMD64_OpAverageMaskedUint8x64(v) case OpAverageUint16x16: v.Op = OpAMD64VPAVGW256 return true @@ -990,6 +1158,18 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpCeilWithPrecisionFloat64x4(v) case OpCeilWithPrecisionFloat64x8: return rewriteValueAMD64_OpCeilWithPrecisionFloat64x8(v) + case OpCeilWithPrecisionMaskedFloat32x16: + return rewriteValueAMD64_OpCeilWithPrecisionMaskedFloat32x16(v) + case OpCeilWithPrecisionMaskedFloat32x4: + return rewriteValueAMD64_OpCeilWithPrecisionMaskedFloat32x4(v) + case OpCeilWithPrecisionMaskedFloat32x8: + return rewriteValueAMD64_OpCeilWithPrecisionMaskedFloat32x8(v) + case OpCeilWithPrecisionMaskedFloat64x2: + return rewriteValueAMD64_OpCeilWithPrecisionMaskedFloat64x2(v) + case OpCeilWithPrecisionMaskedFloat64x4: + return rewriteValueAMD64_OpCeilWithPrecisionMaskedFloat64x4(v) + case OpCeilWithPrecisionMaskedFloat64x8: + return rewriteValueAMD64_OpCeilWithPrecisionMaskedFloat64x8(v) case OpClosureCall: v.Op = OpAMD64CALLclosure return true @@ -1088,6 +1268,18 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpDiffWithCeilWithPrecisionFloat64x4(v) case OpDiffWithCeilWithPrecisionFloat64x8: return rewriteValueAMD64_OpDiffWithCeilWithPrecisionFloat64x8(v) + case OpDiffWithCeilWithPrecisionMaskedFloat32x16: + return rewriteValueAMD64_OpDiffWithCeilWithPrecisionMaskedFloat32x16(v) + case OpDiffWithCeilWithPrecisionMaskedFloat32x4: + return rewriteValueAMD64_OpDiffWithCeilWithPrecisionMaskedFloat32x4(v) + case OpDiffWithCeilWithPrecisionMaskedFloat32x8: + return rewriteValueAMD64_OpDiffWithCeilWithPrecisionMaskedFloat32x8(v) + case OpDiffWithCeilWithPrecisionMaskedFloat64x2: + return rewriteValueAMD64_OpDiffWithCeilWithPrecisionMaskedFloat64x2(v) + case OpDiffWithCeilWithPrecisionMaskedFloat64x4: + return rewriteValueAMD64_OpDiffWithCeilWithPrecisionMaskedFloat64x4(v) + case OpDiffWithCeilWithPrecisionMaskedFloat64x8: + return rewriteValueAMD64_OpDiffWithCeilWithPrecisionMaskedFloat64x8(v) case OpDiffWithFloorWithPrecisionFloat32x16: return rewriteValueAMD64_OpDiffWithFloorWithPrecisionFloat32x16(v) case OpDiffWithFloorWithPrecisionFloat32x4: @@ -1100,6 +1292,18 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpDiffWithFloorWithPrecisionFloat64x4(v) case OpDiffWithFloorWithPrecisionFloat64x8: return rewriteValueAMD64_OpDiffWithFloorWithPrecisionFloat64x8(v) + case OpDiffWithFloorWithPrecisionMaskedFloat32x16: + return rewriteValueAMD64_OpDiffWithFloorWithPrecisionMaskedFloat32x16(v) + case OpDiffWithFloorWithPrecisionMaskedFloat32x4: + return rewriteValueAMD64_OpDiffWithFloorWithPrecisionMaskedFloat32x4(v) + case OpDiffWithFloorWithPrecisionMaskedFloat32x8: + return rewriteValueAMD64_OpDiffWithFloorWithPrecisionMaskedFloat32x8(v) + case OpDiffWithFloorWithPrecisionMaskedFloat64x2: + return rewriteValueAMD64_OpDiffWithFloorWithPrecisionMaskedFloat64x2(v) + case OpDiffWithFloorWithPrecisionMaskedFloat64x4: + return rewriteValueAMD64_OpDiffWithFloorWithPrecisionMaskedFloat64x4(v) + case OpDiffWithFloorWithPrecisionMaskedFloat64x8: + return rewriteValueAMD64_OpDiffWithFloorWithPrecisionMaskedFloat64x8(v) case OpDiffWithRoundWithPrecisionFloat32x16: return rewriteValueAMD64_OpDiffWithRoundWithPrecisionFloat32x16(v) case OpDiffWithRoundWithPrecisionFloat32x4: @@ -1112,6 +1316,18 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpDiffWithRoundWithPrecisionFloat64x4(v) case OpDiffWithRoundWithPrecisionFloat64x8: return rewriteValueAMD64_OpDiffWithRoundWithPrecisionFloat64x8(v) + case OpDiffWithRoundWithPrecisionMaskedFloat32x16: + return rewriteValueAMD64_OpDiffWithRoundWithPrecisionMaskedFloat32x16(v) + case OpDiffWithRoundWithPrecisionMaskedFloat32x4: + return rewriteValueAMD64_OpDiffWithRoundWithPrecisionMaskedFloat32x4(v) + case OpDiffWithRoundWithPrecisionMaskedFloat32x8: + return rewriteValueAMD64_OpDiffWithRoundWithPrecisionMaskedFloat32x8(v) + case OpDiffWithRoundWithPrecisionMaskedFloat64x2: + return rewriteValueAMD64_OpDiffWithRoundWithPrecisionMaskedFloat64x2(v) + case OpDiffWithRoundWithPrecisionMaskedFloat64x4: + return rewriteValueAMD64_OpDiffWithRoundWithPrecisionMaskedFloat64x4(v) + case OpDiffWithRoundWithPrecisionMaskedFloat64x8: + return rewriteValueAMD64_OpDiffWithRoundWithPrecisionMaskedFloat64x8(v) case OpDiffWithTruncWithPrecisionFloat32x16: return rewriteValueAMD64_OpDiffWithTruncWithPrecisionFloat32x16(v) case OpDiffWithTruncWithPrecisionFloat32x4: @@ -1124,6 +1340,18 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpDiffWithTruncWithPrecisionFloat64x4(v) case OpDiffWithTruncWithPrecisionFloat64x8: return rewriteValueAMD64_OpDiffWithTruncWithPrecisionFloat64x8(v) + case OpDiffWithTruncWithPrecisionMaskedFloat32x16: + return rewriteValueAMD64_OpDiffWithTruncWithPrecisionMaskedFloat32x16(v) + case OpDiffWithTruncWithPrecisionMaskedFloat32x4: + return rewriteValueAMD64_OpDiffWithTruncWithPrecisionMaskedFloat32x4(v) + case OpDiffWithTruncWithPrecisionMaskedFloat32x8: + return rewriteValueAMD64_OpDiffWithTruncWithPrecisionMaskedFloat32x8(v) + case OpDiffWithTruncWithPrecisionMaskedFloat64x2: + return rewriteValueAMD64_OpDiffWithTruncWithPrecisionMaskedFloat64x2(v) + case OpDiffWithTruncWithPrecisionMaskedFloat64x4: + return rewriteValueAMD64_OpDiffWithTruncWithPrecisionMaskedFloat64x4(v) + case OpDiffWithTruncWithPrecisionMaskedFloat64x8: + return rewriteValueAMD64_OpDiffWithTruncWithPrecisionMaskedFloat64x8(v) case OpDiv128u: v.Op = OpAMD64DIVQU2 return true @@ -1167,6 +1395,18 @@ func rewriteValueAMD64(v *Value) bool { case OpDivFloat64x8: v.Op = OpAMD64VDIVPD512 return true + case OpDivMaskedFloat32x16: + return rewriteValueAMD64_OpDivMaskedFloat32x16(v) + case OpDivMaskedFloat32x4: + return rewriteValueAMD64_OpDivMaskedFloat32x4(v) + case OpDivMaskedFloat32x8: + return rewriteValueAMD64_OpDivMaskedFloat32x8(v) + case OpDivMaskedFloat64x2: + return rewriteValueAMD64_OpDivMaskedFloat64x2(v) + case OpDivMaskedFloat64x4: + return rewriteValueAMD64_OpDivMaskedFloat64x4(v) + case OpDivMaskedFloat64x8: + return rewriteValueAMD64_OpDivMaskedFloat64x8(v) case OpDotProdBroadcastFloat64x2: return rewriteValueAMD64_OpDotProdBroadcastFloat64x2(v) case OpEq16: @@ -1229,6 +1469,66 @@ func rewriteValueAMD64(v *Value) bool { return true case OpEqualInt8x64: return rewriteValueAMD64_OpEqualInt8x64(v) + case OpEqualMaskedFloat32x16: + return rewriteValueAMD64_OpEqualMaskedFloat32x16(v) + case OpEqualMaskedFloat32x4: + return rewriteValueAMD64_OpEqualMaskedFloat32x4(v) + case OpEqualMaskedFloat32x8: + return rewriteValueAMD64_OpEqualMaskedFloat32x8(v) + case OpEqualMaskedFloat64x2: + return rewriteValueAMD64_OpEqualMaskedFloat64x2(v) + case OpEqualMaskedFloat64x4: + return rewriteValueAMD64_OpEqualMaskedFloat64x4(v) + case OpEqualMaskedFloat64x8: + return rewriteValueAMD64_OpEqualMaskedFloat64x8(v) + case OpEqualMaskedInt16x16: + return rewriteValueAMD64_OpEqualMaskedInt16x16(v) + case OpEqualMaskedInt16x32: + return rewriteValueAMD64_OpEqualMaskedInt16x32(v) + case OpEqualMaskedInt16x8: + return rewriteValueAMD64_OpEqualMaskedInt16x8(v) + case OpEqualMaskedInt32x16: + return rewriteValueAMD64_OpEqualMaskedInt32x16(v) + case OpEqualMaskedInt32x4: + return rewriteValueAMD64_OpEqualMaskedInt32x4(v) + case OpEqualMaskedInt32x8: + return rewriteValueAMD64_OpEqualMaskedInt32x8(v) + case OpEqualMaskedInt64x2: + return rewriteValueAMD64_OpEqualMaskedInt64x2(v) + case OpEqualMaskedInt64x4: + return rewriteValueAMD64_OpEqualMaskedInt64x4(v) + case OpEqualMaskedInt64x8: + return rewriteValueAMD64_OpEqualMaskedInt64x8(v) + case OpEqualMaskedInt8x16: + return rewriteValueAMD64_OpEqualMaskedInt8x16(v) + case OpEqualMaskedInt8x32: + return rewriteValueAMD64_OpEqualMaskedInt8x32(v) + case OpEqualMaskedInt8x64: + return rewriteValueAMD64_OpEqualMaskedInt8x64(v) + case OpEqualMaskedUint16x16: + return rewriteValueAMD64_OpEqualMaskedUint16x16(v) + case OpEqualMaskedUint16x32: + return rewriteValueAMD64_OpEqualMaskedUint16x32(v) + case OpEqualMaskedUint16x8: + return rewriteValueAMD64_OpEqualMaskedUint16x8(v) + case OpEqualMaskedUint32x16: + return rewriteValueAMD64_OpEqualMaskedUint32x16(v) + case OpEqualMaskedUint32x4: + return rewriteValueAMD64_OpEqualMaskedUint32x4(v) + case OpEqualMaskedUint32x8: + return rewriteValueAMD64_OpEqualMaskedUint32x8(v) + case OpEqualMaskedUint64x2: + return rewriteValueAMD64_OpEqualMaskedUint64x2(v) + case OpEqualMaskedUint64x4: + return rewriteValueAMD64_OpEqualMaskedUint64x4(v) + case OpEqualMaskedUint64x8: + return rewriteValueAMD64_OpEqualMaskedUint64x8(v) + case OpEqualMaskedUint8x16: + return rewriteValueAMD64_OpEqualMaskedUint8x16(v) + case OpEqualMaskedUint8x32: + return rewriteValueAMD64_OpEqualMaskedUint8x32(v) + case OpEqualMaskedUint8x64: + return rewriteValueAMD64_OpEqualMaskedUint8x64(v) case OpEqualUint16x16: return rewriteValueAMD64_OpEqualUint16x16(v) case OpEqualUint16x32: @@ -1277,6 +1577,18 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpFloorWithPrecisionFloat64x4(v) case OpFloorWithPrecisionFloat64x8: return rewriteValueAMD64_OpFloorWithPrecisionFloat64x8(v) + case OpFloorWithPrecisionMaskedFloat32x16: + return rewriteValueAMD64_OpFloorWithPrecisionMaskedFloat32x16(v) + case OpFloorWithPrecisionMaskedFloat32x4: + return rewriteValueAMD64_OpFloorWithPrecisionMaskedFloat32x4(v) + case OpFloorWithPrecisionMaskedFloat32x8: + return rewriteValueAMD64_OpFloorWithPrecisionMaskedFloat32x8(v) + case OpFloorWithPrecisionMaskedFloat64x2: + return rewriteValueAMD64_OpFloorWithPrecisionMaskedFloat64x2(v) + case OpFloorWithPrecisionMaskedFloat64x4: + return rewriteValueAMD64_OpFloorWithPrecisionMaskedFloat64x4(v) + case OpFloorWithPrecisionMaskedFloat64x8: + return rewriteValueAMD64_OpFloorWithPrecisionMaskedFloat64x8(v) case OpFusedMultiplyAddFloat32x16: v.Op = OpAMD64VFMADD213PS512 return true @@ -1295,6 +1607,18 @@ func rewriteValueAMD64(v *Value) bool { case OpFusedMultiplyAddFloat64x8: v.Op = OpAMD64VFMADD213PD512 return true + case OpFusedMultiplyAddMaskedFloat32x16: + return rewriteValueAMD64_OpFusedMultiplyAddMaskedFloat32x16(v) + case OpFusedMultiplyAddMaskedFloat32x4: + return rewriteValueAMD64_OpFusedMultiplyAddMaskedFloat32x4(v) + case OpFusedMultiplyAddMaskedFloat32x8: + return rewriteValueAMD64_OpFusedMultiplyAddMaskedFloat32x8(v) + case OpFusedMultiplyAddMaskedFloat64x2: + return rewriteValueAMD64_OpFusedMultiplyAddMaskedFloat64x2(v) + case OpFusedMultiplyAddMaskedFloat64x4: + return rewriteValueAMD64_OpFusedMultiplyAddMaskedFloat64x4(v) + case OpFusedMultiplyAddMaskedFloat64x8: + return rewriteValueAMD64_OpFusedMultiplyAddMaskedFloat64x8(v) case OpFusedMultiplyAddSubFloat32x16: v.Op = OpAMD64VFMADDSUB213PS512 return true @@ -1313,6 +1637,18 @@ func rewriteValueAMD64(v *Value) bool { case OpFusedMultiplyAddSubFloat64x8: v.Op = OpAMD64VFMADDSUB213PD512 return true + case OpFusedMultiplyAddSubMaskedFloat32x16: + return rewriteValueAMD64_OpFusedMultiplyAddSubMaskedFloat32x16(v) + case OpFusedMultiplyAddSubMaskedFloat32x4: + return rewriteValueAMD64_OpFusedMultiplyAddSubMaskedFloat32x4(v) + case OpFusedMultiplyAddSubMaskedFloat32x8: + return rewriteValueAMD64_OpFusedMultiplyAddSubMaskedFloat32x8(v) + case OpFusedMultiplyAddSubMaskedFloat64x2: + return rewriteValueAMD64_OpFusedMultiplyAddSubMaskedFloat64x2(v) + case OpFusedMultiplyAddSubMaskedFloat64x4: + return rewriteValueAMD64_OpFusedMultiplyAddSubMaskedFloat64x4(v) + case OpFusedMultiplyAddSubMaskedFloat64x8: + return rewriteValueAMD64_OpFusedMultiplyAddSubMaskedFloat64x8(v) case OpFusedMultiplySubAddFloat32x16: v.Op = OpAMD64VFMSUBADD213PS512 return true @@ -1331,18 +1667,48 @@ func rewriteValueAMD64(v *Value) bool { case OpFusedMultiplySubAddFloat64x8: v.Op = OpAMD64VFMSUBADD213PD512 return true + case OpFusedMultiplySubAddMaskedFloat32x16: + return rewriteValueAMD64_OpFusedMultiplySubAddMaskedFloat32x16(v) + case OpFusedMultiplySubAddMaskedFloat32x4: + return rewriteValueAMD64_OpFusedMultiplySubAddMaskedFloat32x4(v) + case OpFusedMultiplySubAddMaskedFloat32x8: + return rewriteValueAMD64_OpFusedMultiplySubAddMaskedFloat32x8(v) + case OpFusedMultiplySubAddMaskedFloat64x2: + return rewriteValueAMD64_OpFusedMultiplySubAddMaskedFloat64x2(v) + case OpFusedMultiplySubAddMaskedFloat64x4: + return rewriteValueAMD64_OpFusedMultiplySubAddMaskedFloat64x4(v) + case OpFusedMultiplySubAddMaskedFloat64x8: + return rewriteValueAMD64_OpFusedMultiplySubAddMaskedFloat64x8(v) + case OpGaloisFieldAffineTransformInversedMaskedUint8x16: + return rewriteValueAMD64_OpGaloisFieldAffineTransformInversedMaskedUint8x16(v) + case OpGaloisFieldAffineTransformInversedMaskedUint8x32: + return rewriteValueAMD64_OpGaloisFieldAffineTransformInversedMaskedUint8x32(v) + case OpGaloisFieldAffineTransformInversedMaskedUint8x64: + return rewriteValueAMD64_OpGaloisFieldAffineTransformInversedMaskedUint8x64(v) case OpGaloisFieldAffineTransformInversedUint8x16: return rewriteValueAMD64_OpGaloisFieldAffineTransformInversedUint8x16(v) case OpGaloisFieldAffineTransformInversedUint8x32: return rewriteValueAMD64_OpGaloisFieldAffineTransformInversedUint8x32(v) case OpGaloisFieldAffineTransformInversedUint8x64: return rewriteValueAMD64_OpGaloisFieldAffineTransformInversedUint8x64(v) + case OpGaloisFieldAffineTransformMaskedUint8x16: + return rewriteValueAMD64_OpGaloisFieldAffineTransformMaskedUint8x16(v) + case OpGaloisFieldAffineTransformMaskedUint8x32: + return rewriteValueAMD64_OpGaloisFieldAffineTransformMaskedUint8x32(v) + case OpGaloisFieldAffineTransformMaskedUint8x64: + return rewriteValueAMD64_OpGaloisFieldAffineTransformMaskedUint8x64(v) case OpGaloisFieldAffineTransformUint8x16: return rewriteValueAMD64_OpGaloisFieldAffineTransformUint8x16(v) case OpGaloisFieldAffineTransformUint8x32: return rewriteValueAMD64_OpGaloisFieldAffineTransformUint8x32(v) case OpGaloisFieldAffineTransformUint8x64: return rewriteValueAMD64_OpGaloisFieldAffineTransformUint8x64(v) + case OpGaloisFieldMulMaskedUint8x16: + return rewriteValueAMD64_OpGaloisFieldMulMaskedUint8x16(v) + case OpGaloisFieldMulMaskedUint8x32: + return rewriteValueAMD64_OpGaloisFieldMulMaskedUint8x32(v) + case OpGaloisFieldMulMaskedUint8x64: + return rewriteValueAMD64_OpGaloisFieldMulMaskedUint8x64(v) case OpGaloisFieldMulUint8x16: v.Op = OpAMD64VGF2P8MULB128 return true @@ -1435,6 +1801,66 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpGreaterEqualInt8x32(v) case OpGreaterEqualInt8x64: return rewriteValueAMD64_OpGreaterEqualInt8x64(v) + case OpGreaterEqualMaskedFloat32x16: + return rewriteValueAMD64_OpGreaterEqualMaskedFloat32x16(v) + case OpGreaterEqualMaskedFloat32x4: + return rewriteValueAMD64_OpGreaterEqualMaskedFloat32x4(v) + case OpGreaterEqualMaskedFloat32x8: + return rewriteValueAMD64_OpGreaterEqualMaskedFloat32x8(v) + case OpGreaterEqualMaskedFloat64x2: + return rewriteValueAMD64_OpGreaterEqualMaskedFloat64x2(v) + case OpGreaterEqualMaskedFloat64x4: + return rewriteValueAMD64_OpGreaterEqualMaskedFloat64x4(v) + case OpGreaterEqualMaskedFloat64x8: + return rewriteValueAMD64_OpGreaterEqualMaskedFloat64x8(v) + case OpGreaterEqualMaskedInt16x16: + return rewriteValueAMD64_OpGreaterEqualMaskedInt16x16(v) + case OpGreaterEqualMaskedInt16x32: + return rewriteValueAMD64_OpGreaterEqualMaskedInt16x32(v) + case OpGreaterEqualMaskedInt16x8: + return rewriteValueAMD64_OpGreaterEqualMaskedInt16x8(v) + case OpGreaterEqualMaskedInt32x16: + return rewriteValueAMD64_OpGreaterEqualMaskedInt32x16(v) + case OpGreaterEqualMaskedInt32x4: + return rewriteValueAMD64_OpGreaterEqualMaskedInt32x4(v) + case OpGreaterEqualMaskedInt32x8: + return rewriteValueAMD64_OpGreaterEqualMaskedInt32x8(v) + case OpGreaterEqualMaskedInt64x2: + return rewriteValueAMD64_OpGreaterEqualMaskedInt64x2(v) + case OpGreaterEqualMaskedInt64x4: + return rewriteValueAMD64_OpGreaterEqualMaskedInt64x4(v) + case OpGreaterEqualMaskedInt64x8: + return rewriteValueAMD64_OpGreaterEqualMaskedInt64x8(v) + case OpGreaterEqualMaskedInt8x16: + return rewriteValueAMD64_OpGreaterEqualMaskedInt8x16(v) + case OpGreaterEqualMaskedInt8x32: + return rewriteValueAMD64_OpGreaterEqualMaskedInt8x32(v) + case OpGreaterEqualMaskedInt8x64: + return rewriteValueAMD64_OpGreaterEqualMaskedInt8x64(v) + case OpGreaterEqualMaskedUint16x16: + return rewriteValueAMD64_OpGreaterEqualMaskedUint16x16(v) + case OpGreaterEqualMaskedUint16x32: + return rewriteValueAMD64_OpGreaterEqualMaskedUint16x32(v) + case OpGreaterEqualMaskedUint16x8: + return rewriteValueAMD64_OpGreaterEqualMaskedUint16x8(v) + case OpGreaterEqualMaskedUint32x16: + return rewriteValueAMD64_OpGreaterEqualMaskedUint32x16(v) + case OpGreaterEqualMaskedUint32x4: + return rewriteValueAMD64_OpGreaterEqualMaskedUint32x4(v) + case OpGreaterEqualMaskedUint32x8: + return rewriteValueAMD64_OpGreaterEqualMaskedUint32x8(v) + case OpGreaterEqualMaskedUint64x2: + return rewriteValueAMD64_OpGreaterEqualMaskedUint64x2(v) + case OpGreaterEqualMaskedUint64x4: + return rewriteValueAMD64_OpGreaterEqualMaskedUint64x4(v) + case OpGreaterEqualMaskedUint64x8: + return rewriteValueAMD64_OpGreaterEqualMaskedUint64x8(v) + case OpGreaterEqualMaskedUint8x16: + return rewriteValueAMD64_OpGreaterEqualMaskedUint8x16(v) + case OpGreaterEqualMaskedUint8x32: + return rewriteValueAMD64_OpGreaterEqualMaskedUint8x32(v) + case OpGreaterEqualMaskedUint8x64: + return rewriteValueAMD64_OpGreaterEqualMaskedUint8x64(v) case OpGreaterEqualUint16x16: return rewriteValueAMD64_OpGreaterEqualUint16x16(v) case OpGreaterEqualUint16x32: @@ -1502,6 +1928,66 @@ func rewriteValueAMD64(v *Value) bool { return true case OpGreaterInt8x64: return rewriteValueAMD64_OpGreaterInt8x64(v) + case OpGreaterMaskedFloat32x16: + return rewriteValueAMD64_OpGreaterMaskedFloat32x16(v) + case OpGreaterMaskedFloat32x4: + return rewriteValueAMD64_OpGreaterMaskedFloat32x4(v) + case OpGreaterMaskedFloat32x8: + return rewriteValueAMD64_OpGreaterMaskedFloat32x8(v) + case OpGreaterMaskedFloat64x2: + return rewriteValueAMD64_OpGreaterMaskedFloat64x2(v) + case OpGreaterMaskedFloat64x4: + return rewriteValueAMD64_OpGreaterMaskedFloat64x4(v) + case OpGreaterMaskedFloat64x8: + return rewriteValueAMD64_OpGreaterMaskedFloat64x8(v) + case OpGreaterMaskedInt16x16: + return rewriteValueAMD64_OpGreaterMaskedInt16x16(v) + case OpGreaterMaskedInt16x32: + return rewriteValueAMD64_OpGreaterMaskedInt16x32(v) + case OpGreaterMaskedInt16x8: + return rewriteValueAMD64_OpGreaterMaskedInt16x8(v) + case OpGreaterMaskedInt32x16: + return rewriteValueAMD64_OpGreaterMaskedInt32x16(v) + case OpGreaterMaskedInt32x4: + return rewriteValueAMD64_OpGreaterMaskedInt32x4(v) + case OpGreaterMaskedInt32x8: + return rewriteValueAMD64_OpGreaterMaskedInt32x8(v) + case OpGreaterMaskedInt64x2: + return rewriteValueAMD64_OpGreaterMaskedInt64x2(v) + case OpGreaterMaskedInt64x4: + return rewriteValueAMD64_OpGreaterMaskedInt64x4(v) + case OpGreaterMaskedInt64x8: + return rewriteValueAMD64_OpGreaterMaskedInt64x8(v) + case OpGreaterMaskedInt8x16: + return rewriteValueAMD64_OpGreaterMaskedInt8x16(v) + case OpGreaterMaskedInt8x32: + return rewriteValueAMD64_OpGreaterMaskedInt8x32(v) + case OpGreaterMaskedInt8x64: + return rewriteValueAMD64_OpGreaterMaskedInt8x64(v) + case OpGreaterMaskedUint16x16: + return rewriteValueAMD64_OpGreaterMaskedUint16x16(v) + case OpGreaterMaskedUint16x32: + return rewriteValueAMD64_OpGreaterMaskedUint16x32(v) + case OpGreaterMaskedUint16x8: + return rewriteValueAMD64_OpGreaterMaskedUint16x8(v) + case OpGreaterMaskedUint32x16: + return rewriteValueAMD64_OpGreaterMaskedUint32x16(v) + case OpGreaterMaskedUint32x4: + return rewriteValueAMD64_OpGreaterMaskedUint32x4(v) + case OpGreaterMaskedUint32x8: + return rewriteValueAMD64_OpGreaterMaskedUint32x8(v) + case OpGreaterMaskedUint64x2: + return rewriteValueAMD64_OpGreaterMaskedUint64x2(v) + case OpGreaterMaskedUint64x4: + return rewriteValueAMD64_OpGreaterMaskedUint64x4(v) + case OpGreaterMaskedUint64x8: + return rewriteValueAMD64_OpGreaterMaskedUint64x8(v) + case OpGreaterMaskedUint8x16: + return rewriteValueAMD64_OpGreaterMaskedUint8x16(v) + case OpGreaterMaskedUint8x32: + return rewriteValueAMD64_OpGreaterMaskedUint8x32(v) + case OpGreaterMaskedUint8x64: + return rewriteValueAMD64_OpGreaterMaskedUint8x64(v) case OpGreaterUint16x16: return rewriteValueAMD64_OpGreaterUint16x16(v) case OpGreaterUint16x32: @@ -1557,6 +2043,18 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpIsNanFloat64x4(v) case OpIsNanFloat64x8: return rewriteValueAMD64_OpIsNanFloat64x8(v) + case OpIsNanMaskedFloat32x16: + return rewriteValueAMD64_OpIsNanMaskedFloat32x16(v) + case OpIsNanMaskedFloat32x4: + return rewriteValueAMD64_OpIsNanMaskedFloat32x4(v) + case OpIsNanMaskedFloat32x8: + return rewriteValueAMD64_OpIsNanMaskedFloat32x8(v) + case OpIsNanMaskedFloat64x2: + return rewriteValueAMD64_OpIsNanMaskedFloat64x2(v) + case OpIsNanMaskedFloat64x4: + return rewriteValueAMD64_OpIsNanMaskedFloat64x4(v) + case OpIsNanMaskedFloat64x8: + return rewriteValueAMD64_OpIsNanMaskedFloat64x8(v) case OpIsNonNil: return rewriteValueAMD64_OpIsNonNil(v) case OpIsSliceInBounds: @@ -1637,6 +2135,66 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpLessEqualInt8x32(v) case OpLessEqualInt8x64: return rewriteValueAMD64_OpLessEqualInt8x64(v) + case OpLessEqualMaskedFloat32x16: + return rewriteValueAMD64_OpLessEqualMaskedFloat32x16(v) + case OpLessEqualMaskedFloat32x4: + return rewriteValueAMD64_OpLessEqualMaskedFloat32x4(v) + case OpLessEqualMaskedFloat32x8: + return rewriteValueAMD64_OpLessEqualMaskedFloat32x8(v) + case OpLessEqualMaskedFloat64x2: + return rewriteValueAMD64_OpLessEqualMaskedFloat64x2(v) + case OpLessEqualMaskedFloat64x4: + return rewriteValueAMD64_OpLessEqualMaskedFloat64x4(v) + case OpLessEqualMaskedFloat64x8: + return rewriteValueAMD64_OpLessEqualMaskedFloat64x8(v) + case OpLessEqualMaskedInt16x16: + return rewriteValueAMD64_OpLessEqualMaskedInt16x16(v) + case OpLessEqualMaskedInt16x32: + return rewriteValueAMD64_OpLessEqualMaskedInt16x32(v) + case OpLessEqualMaskedInt16x8: + return rewriteValueAMD64_OpLessEqualMaskedInt16x8(v) + case OpLessEqualMaskedInt32x16: + return rewriteValueAMD64_OpLessEqualMaskedInt32x16(v) + case OpLessEqualMaskedInt32x4: + return rewriteValueAMD64_OpLessEqualMaskedInt32x4(v) + case OpLessEqualMaskedInt32x8: + return rewriteValueAMD64_OpLessEqualMaskedInt32x8(v) + case OpLessEqualMaskedInt64x2: + return rewriteValueAMD64_OpLessEqualMaskedInt64x2(v) + case OpLessEqualMaskedInt64x4: + return rewriteValueAMD64_OpLessEqualMaskedInt64x4(v) + case OpLessEqualMaskedInt64x8: + return rewriteValueAMD64_OpLessEqualMaskedInt64x8(v) + case OpLessEqualMaskedInt8x16: + return rewriteValueAMD64_OpLessEqualMaskedInt8x16(v) + case OpLessEqualMaskedInt8x32: + return rewriteValueAMD64_OpLessEqualMaskedInt8x32(v) + case OpLessEqualMaskedInt8x64: + return rewriteValueAMD64_OpLessEqualMaskedInt8x64(v) + case OpLessEqualMaskedUint16x16: + return rewriteValueAMD64_OpLessEqualMaskedUint16x16(v) + case OpLessEqualMaskedUint16x32: + return rewriteValueAMD64_OpLessEqualMaskedUint16x32(v) + case OpLessEqualMaskedUint16x8: + return rewriteValueAMD64_OpLessEqualMaskedUint16x8(v) + case OpLessEqualMaskedUint32x16: + return rewriteValueAMD64_OpLessEqualMaskedUint32x16(v) + case OpLessEqualMaskedUint32x4: + return rewriteValueAMD64_OpLessEqualMaskedUint32x4(v) + case OpLessEqualMaskedUint32x8: + return rewriteValueAMD64_OpLessEqualMaskedUint32x8(v) + case OpLessEqualMaskedUint64x2: + return rewriteValueAMD64_OpLessEqualMaskedUint64x2(v) + case OpLessEqualMaskedUint64x4: + return rewriteValueAMD64_OpLessEqualMaskedUint64x4(v) + case OpLessEqualMaskedUint64x8: + return rewriteValueAMD64_OpLessEqualMaskedUint64x8(v) + case OpLessEqualMaskedUint8x16: + return rewriteValueAMD64_OpLessEqualMaskedUint8x16(v) + case OpLessEqualMaskedUint8x32: + return rewriteValueAMD64_OpLessEqualMaskedUint8x32(v) + case OpLessEqualMaskedUint8x64: + return rewriteValueAMD64_OpLessEqualMaskedUint8x64(v) case OpLessEqualUint16x16: return rewriteValueAMD64_OpLessEqualUint16x16(v) case OpLessEqualUint16x32: @@ -1697,6 +2255,66 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpLessInt8x32(v) case OpLessInt8x64: return rewriteValueAMD64_OpLessInt8x64(v) + case OpLessMaskedFloat32x16: + return rewriteValueAMD64_OpLessMaskedFloat32x16(v) + case OpLessMaskedFloat32x4: + return rewriteValueAMD64_OpLessMaskedFloat32x4(v) + case OpLessMaskedFloat32x8: + return rewriteValueAMD64_OpLessMaskedFloat32x8(v) + case OpLessMaskedFloat64x2: + return rewriteValueAMD64_OpLessMaskedFloat64x2(v) + case OpLessMaskedFloat64x4: + return rewriteValueAMD64_OpLessMaskedFloat64x4(v) + case OpLessMaskedFloat64x8: + return rewriteValueAMD64_OpLessMaskedFloat64x8(v) + case OpLessMaskedInt16x16: + return rewriteValueAMD64_OpLessMaskedInt16x16(v) + case OpLessMaskedInt16x32: + return rewriteValueAMD64_OpLessMaskedInt16x32(v) + case OpLessMaskedInt16x8: + return rewriteValueAMD64_OpLessMaskedInt16x8(v) + case OpLessMaskedInt32x16: + return rewriteValueAMD64_OpLessMaskedInt32x16(v) + case OpLessMaskedInt32x4: + return rewriteValueAMD64_OpLessMaskedInt32x4(v) + case OpLessMaskedInt32x8: + return rewriteValueAMD64_OpLessMaskedInt32x8(v) + case OpLessMaskedInt64x2: + return rewriteValueAMD64_OpLessMaskedInt64x2(v) + case OpLessMaskedInt64x4: + return rewriteValueAMD64_OpLessMaskedInt64x4(v) + case OpLessMaskedInt64x8: + return rewriteValueAMD64_OpLessMaskedInt64x8(v) + case OpLessMaskedInt8x16: + return rewriteValueAMD64_OpLessMaskedInt8x16(v) + case OpLessMaskedInt8x32: + return rewriteValueAMD64_OpLessMaskedInt8x32(v) + case OpLessMaskedInt8x64: + return rewriteValueAMD64_OpLessMaskedInt8x64(v) + case OpLessMaskedUint16x16: + return rewriteValueAMD64_OpLessMaskedUint16x16(v) + case OpLessMaskedUint16x32: + return rewriteValueAMD64_OpLessMaskedUint16x32(v) + case OpLessMaskedUint16x8: + return rewriteValueAMD64_OpLessMaskedUint16x8(v) + case OpLessMaskedUint32x16: + return rewriteValueAMD64_OpLessMaskedUint32x16(v) + case OpLessMaskedUint32x4: + return rewriteValueAMD64_OpLessMaskedUint32x4(v) + case OpLessMaskedUint32x8: + return rewriteValueAMD64_OpLessMaskedUint32x8(v) + case OpLessMaskedUint64x2: + return rewriteValueAMD64_OpLessMaskedUint64x2(v) + case OpLessMaskedUint64x4: + return rewriteValueAMD64_OpLessMaskedUint64x4(v) + case OpLessMaskedUint64x8: + return rewriteValueAMD64_OpLessMaskedUint64x8(v) + case OpLessMaskedUint8x16: + return rewriteValueAMD64_OpLessMaskedUint8x16(v) + case OpLessMaskedUint8x32: + return rewriteValueAMD64_OpLessMaskedUint8x32(v) + case OpLessMaskedUint8x64: + return rewriteValueAMD64_OpLessMaskedUint8x64(v) case OpLessUint16x16: return rewriteValueAMD64_OpLessUint16x16(v) case OpLessUint16x32: @@ -1757,1536 +2375,6 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpLsh8x64(v) case OpLsh8x8: return rewriteValueAMD64_OpLsh8x8(v) - case OpMaskedAbsoluteInt16x16: - return rewriteValueAMD64_OpMaskedAbsoluteInt16x16(v) - case OpMaskedAbsoluteInt16x32: - return rewriteValueAMD64_OpMaskedAbsoluteInt16x32(v) - case OpMaskedAbsoluteInt16x8: - return rewriteValueAMD64_OpMaskedAbsoluteInt16x8(v) - case OpMaskedAbsoluteInt32x16: - return rewriteValueAMD64_OpMaskedAbsoluteInt32x16(v) - case OpMaskedAbsoluteInt32x4: - return rewriteValueAMD64_OpMaskedAbsoluteInt32x4(v) - case OpMaskedAbsoluteInt32x8: - return rewriteValueAMD64_OpMaskedAbsoluteInt32x8(v) - case OpMaskedAbsoluteInt64x2: - return rewriteValueAMD64_OpMaskedAbsoluteInt64x2(v) - case OpMaskedAbsoluteInt64x4: - return rewriteValueAMD64_OpMaskedAbsoluteInt64x4(v) - case OpMaskedAbsoluteInt64x8: - return rewriteValueAMD64_OpMaskedAbsoluteInt64x8(v) - case OpMaskedAbsoluteInt8x16: - return rewriteValueAMD64_OpMaskedAbsoluteInt8x16(v) - case OpMaskedAbsoluteInt8x32: - return rewriteValueAMD64_OpMaskedAbsoluteInt8x32(v) - case OpMaskedAbsoluteInt8x64: - return rewriteValueAMD64_OpMaskedAbsoluteInt8x64(v) - case OpMaskedAddFloat32x16: - return rewriteValueAMD64_OpMaskedAddFloat32x16(v) - case OpMaskedAddFloat32x4: - return rewriteValueAMD64_OpMaskedAddFloat32x4(v) - case OpMaskedAddFloat32x8: - return rewriteValueAMD64_OpMaskedAddFloat32x8(v) - case OpMaskedAddFloat64x2: - return rewriteValueAMD64_OpMaskedAddFloat64x2(v) - case OpMaskedAddFloat64x4: - return rewriteValueAMD64_OpMaskedAddFloat64x4(v) - case OpMaskedAddFloat64x8: - return rewriteValueAMD64_OpMaskedAddFloat64x8(v) - case OpMaskedAddInt16x16: - return rewriteValueAMD64_OpMaskedAddInt16x16(v) - case OpMaskedAddInt16x32: - return rewriteValueAMD64_OpMaskedAddInt16x32(v) - case OpMaskedAddInt16x8: - return rewriteValueAMD64_OpMaskedAddInt16x8(v) - case OpMaskedAddInt32x16: - return rewriteValueAMD64_OpMaskedAddInt32x16(v) - case OpMaskedAddInt32x4: - return rewriteValueAMD64_OpMaskedAddInt32x4(v) - case OpMaskedAddInt32x8: - return rewriteValueAMD64_OpMaskedAddInt32x8(v) - case OpMaskedAddInt64x2: - return rewriteValueAMD64_OpMaskedAddInt64x2(v) - case OpMaskedAddInt64x4: - return rewriteValueAMD64_OpMaskedAddInt64x4(v) - case OpMaskedAddInt64x8: - return rewriteValueAMD64_OpMaskedAddInt64x8(v) - case OpMaskedAddInt8x16: - return rewriteValueAMD64_OpMaskedAddInt8x16(v) - case OpMaskedAddInt8x32: - return rewriteValueAMD64_OpMaskedAddInt8x32(v) - case OpMaskedAddInt8x64: - return rewriteValueAMD64_OpMaskedAddInt8x64(v) - case OpMaskedAddUint16x16: - return rewriteValueAMD64_OpMaskedAddUint16x16(v) - case OpMaskedAddUint16x32: - return rewriteValueAMD64_OpMaskedAddUint16x32(v) - case OpMaskedAddUint16x8: - return rewriteValueAMD64_OpMaskedAddUint16x8(v) - case OpMaskedAddUint32x16: - return rewriteValueAMD64_OpMaskedAddUint32x16(v) - case OpMaskedAddUint32x4: - return rewriteValueAMD64_OpMaskedAddUint32x4(v) - case OpMaskedAddUint32x8: - return rewriteValueAMD64_OpMaskedAddUint32x8(v) - case OpMaskedAddUint64x2: - return rewriteValueAMD64_OpMaskedAddUint64x2(v) - case OpMaskedAddUint64x4: - return rewriteValueAMD64_OpMaskedAddUint64x4(v) - case OpMaskedAddUint64x8: - return rewriteValueAMD64_OpMaskedAddUint64x8(v) - case OpMaskedAddUint8x16: - return rewriteValueAMD64_OpMaskedAddUint8x16(v) - case OpMaskedAddUint8x32: - return rewriteValueAMD64_OpMaskedAddUint8x32(v) - case OpMaskedAddUint8x64: - return rewriteValueAMD64_OpMaskedAddUint8x64(v) - case OpMaskedAndInt32x16: - return rewriteValueAMD64_OpMaskedAndInt32x16(v) - case OpMaskedAndInt32x4: - return rewriteValueAMD64_OpMaskedAndInt32x4(v) - case OpMaskedAndInt32x8: - return rewriteValueAMD64_OpMaskedAndInt32x8(v) - case OpMaskedAndInt64x2: - return rewriteValueAMD64_OpMaskedAndInt64x2(v) - case OpMaskedAndInt64x4: - return rewriteValueAMD64_OpMaskedAndInt64x4(v) - case OpMaskedAndInt64x8: - return rewriteValueAMD64_OpMaskedAndInt64x8(v) - case OpMaskedAndNotInt32x16: - return rewriteValueAMD64_OpMaskedAndNotInt32x16(v) - case OpMaskedAndNotInt32x4: - return rewriteValueAMD64_OpMaskedAndNotInt32x4(v) - case OpMaskedAndNotInt32x8: - return rewriteValueAMD64_OpMaskedAndNotInt32x8(v) - case OpMaskedAndNotInt64x2: - return rewriteValueAMD64_OpMaskedAndNotInt64x2(v) - case OpMaskedAndNotInt64x4: - return rewriteValueAMD64_OpMaskedAndNotInt64x4(v) - case OpMaskedAndNotInt64x8: - return rewriteValueAMD64_OpMaskedAndNotInt64x8(v) - case OpMaskedAndNotUint32x16: - return rewriteValueAMD64_OpMaskedAndNotUint32x16(v) - case OpMaskedAndNotUint32x4: - return rewriteValueAMD64_OpMaskedAndNotUint32x4(v) - case OpMaskedAndNotUint32x8: - return rewriteValueAMD64_OpMaskedAndNotUint32x8(v) - case OpMaskedAndNotUint64x2: - return rewriteValueAMD64_OpMaskedAndNotUint64x2(v) - case OpMaskedAndNotUint64x4: - return rewriteValueAMD64_OpMaskedAndNotUint64x4(v) - case OpMaskedAndNotUint64x8: - return rewriteValueAMD64_OpMaskedAndNotUint64x8(v) - case OpMaskedAndUint32x16: - return rewriteValueAMD64_OpMaskedAndUint32x16(v) - case OpMaskedAndUint32x4: - return rewriteValueAMD64_OpMaskedAndUint32x4(v) - case OpMaskedAndUint32x8: - return rewriteValueAMD64_OpMaskedAndUint32x8(v) - case OpMaskedAndUint64x2: - return rewriteValueAMD64_OpMaskedAndUint64x2(v) - case OpMaskedAndUint64x4: - return rewriteValueAMD64_OpMaskedAndUint64x4(v) - case OpMaskedAndUint64x8: - return rewriteValueAMD64_OpMaskedAndUint64x8(v) - case OpMaskedApproximateReciprocalFloat32x16: - return rewriteValueAMD64_OpMaskedApproximateReciprocalFloat32x16(v) - case OpMaskedApproximateReciprocalFloat32x4: - return rewriteValueAMD64_OpMaskedApproximateReciprocalFloat32x4(v) - case OpMaskedApproximateReciprocalFloat32x8: - return rewriteValueAMD64_OpMaskedApproximateReciprocalFloat32x8(v) - case OpMaskedApproximateReciprocalFloat64x2: - return rewriteValueAMD64_OpMaskedApproximateReciprocalFloat64x2(v) - case OpMaskedApproximateReciprocalFloat64x4: - return rewriteValueAMD64_OpMaskedApproximateReciprocalFloat64x4(v) - case OpMaskedApproximateReciprocalFloat64x8: - return rewriteValueAMD64_OpMaskedApproximateReciprocalFloat64x8(v) - case OpMaskedApproximateReciprocalOfSqrtFloat32x16: - return rewriteValueAMD64_OpMaskedApproximateReciprocalOfSqrtFloat32x16(v) - case OpMaskedApproximateReciprocalOfSqrtFloat32x4: - return rewriteValueAMD64_OpMaskedApproximateReciprocalOfSqrtFloat32x4(v) - case OpMaskedApproximateReciprocalOfSqrtFloat32x8: - return rewriteValueAMD64_OpMaskedApproximateReciprocalOfSqrtFloat32x8(v) - case OpMaskedApproximateReciprocalOfSqrtFloat64x2: - return rewriteValueAMD64_OpMaskedApproximateReciprocalOfSqrtFloat64x2(v) - case OpMaskedApproximateReciprocalOfSqrtFloat64x4: - return rewriteValueAMD64_OpMaskedApproximateReciprocalOfSqrtFloat64x4(v) - case OpMaskedApproximateReciprocalOfSqrtFloat64x8: - return rewriteValueAMD64_OpMaskedApproximateReciprocalOfSqrtFloat64x8(v) - case OpMaskedAverageUint16x16: - return rewriteValueAMD64_OpMaskedAverageUint16x16(v) - case OpMaskedAverageUint16x32: - return rewriteValueAMD64_OpMaskedAverageUint16x32(v) - case OpMaskedAverageUint16x8: - return rewriteValueAMD64_OpMaskedAverageUint16x8(v) - case OpMaskedAverageUint8x16: - return rewriteValueAMD64_OpMaskedAverageUint8x16(v) - case OpMaskedAverageUint8x32: - return rewriteValueAMD64_OpMaskedAverageUint8x32(v) - case OpMaskedAverageUint8x64: - return rewriteValueAMD64_OpMaskedAverageUint8x64(v) - case OpMaskedCeilWithPrecisionFloat32x16: - return rewriteValueAMD64_OpMaskedCeilWithPrecisionFloat32x16(v) - case OpMaskedCeilWithPrecisionFloat32x4: - return rewriteValueAMD64_OpMaskedCeilWithPrecisionFloat32x4(v) - case OpMaskedCeilWithPrecisionFloat32x8: - return rewriteValueAMD64_OpMaskedCeilWithPrecisionFloat32x8(v) - case OpMaskedCeilWithPrecisionFloat64x2: - return rewriteValueAMD64_OpMaskedCeilWithPrecisionFloat64x2(v) - case OpMaskedCeilWithPrecisionFloat64x4: - return rewriteValueAMD64_OpMaskedCeilWithPrecisionFloat64x4(v) - case OpMaskedCeilWithPrecisionFloat64x8: - return rewriteValueAMD64_OpMaskedCeilWithPrecisionFloat64x8(v) - case OpMaskedDiffWithCeilWithPrecisionFloat32x16: - return rewriteValueAMD64_OpMaskedDiffWithCeilWithPrecisionFloat32x16(v) - case OpMaskedDiffWithCeilWithPrecisionFloat32x4: - return rewriteValueAMD64_OpMaskedDiffWithCeilWithPrecisionFloat32x4(v) - case OpMaskedDiffWithCeilWithPrecisionFloat32x8: - return rewriteValueAMD64_OpMaskedDiffWithCeilWithPrecisionFloat32x8(v) - case OpMaskedDiffWithCeilWithPrecisionFloat64x2: - return rewriteValueAMD64_OpMaskedDiffWithCeilWithPrecisionFloat64x2(v) - case OpMaskedDiffWithCeilWithPrecisionFloat64x4: - return rewriteValueAMD64_OpMaskedDiffWithCeilWithPrecisionFloat64x4(v) - case OpMaskedDiffWithCeilWithPrecisionFloat64x8: - return rewriteValueAMD64_OpMaskedDiffWithCeilWithPrecisionFloat64x8(v) - case OpMaskedDiffWithFloorWithPrecisionFloat32x16: - return rewriteValueAMD64_OpMaskedDiffWithFloorWithPrecisionFloat32x16(v) - case OpMaskedDiffWithFloorWithPrecisionFloat32x4: - return rewriteValueAMD64_OpMaskedDiffWithFloorWithPrecisionFloat32x4(v) - case OpMaskedDiffWithFloorWithPrecisionFloat32x8: - return rewriteValueAMD64_OpMaskedDiffWithFloorWithPrecisionFloat32x8(v) - case OpMaskedDiffWithFloorWithPrecisionFloat64x2: - return rewriteValueAMD64_OpMaskedDiffWithFloorWithPrecisionFloat64x2(v) - case OpMaskedDiffWithFloorWithPrecisionFloat64x4: - return rewriteValueAMD64_OpMaskedDiffWithFloorWithPrecisionFloat64x4(v) - case OpMaskedDiffWithFloorWithPrecisionFloat64x8: - return rewriteValueAMD64_OpMaskedDiffWithFloorWithPrecisionFloat64x8(v) - case OpMaskedDiffWithRoundWithPrecisionFloat32x16: - return rewriteValueAMD64_OpMaskedDiffWithRoundWithPrecisionFloat32x16(v) - case OpMaskedDiffWithRoundWithPrecisionFloat32x4: - return rewriteValueAMD64_OpMaskedDiffWithRoundWithPrecisionFloat32x4(v) - case OpMaskedDiffWithRoundWithPrecisionFloat32x8: - return rewriteValueAMD64_OpMaskedDiffWithRoundWithPrecisionFloat32x8(v) - case OpMaskedDiffWithRoundWithPrecisionFloat64x2: - return rewriteValueAMD64_OpMaskedDiffWithRoundWithPrecisionFloat64x2(v) - case OpMaskedDiffWithRoundWithPrecisionFloat64x4: - return rewriteValueAMD64_OpMaskedDiffWithRoundWithPrecisionFloat64x4(v) - case OpMaskedDiffWithRoundWithPrecisionFloat64x8: - return rewriteValueAMD64_OpMaskedDiffWithRoundWithPrecisionFloat64x8(v) - case OpMaskedDiffWithTruncWithPrecisionFloat32x16: - return rewriteValueAMD64_OpMaskedDiffWithTruncWithPrecisionFloat32x16(v) - case OpMaskedDiffWithTruncWithPrecisionFloat32x4: - return rewriteValueAMD64_OpMaskedDiffWithTruncWithPrecisionFloat32x4(v) - case OpMaskedDiffWithTruncWithPrecisionFloat32x8: - return rewriteValueAMD64_OpMaskedDiffWithTruncWithPrecisionFloat32x8(v) - case OpMaskedDiffWithTruncWithPrecisionFloat64x2: - return rewriteValueAMD64_OpMaskedDiffWithTruncWithPrecisionFloat64x2(v) - case OpMaskedDiffWithTruncWithPrecisionFloat64x4: - return rewriteValueAMD64_OpMaskedDiffWithTruncWithPrecisionFloat64x4(v) - case OpMaskedDiffWithTruncWithPrecisionFloat64x8: - return rewriteValueAMD64_OpMaskedDiffWithTruncWithPrecisionFloat64x8(v) - case OpMaskedDivFloat32x16: - return rewriteValueAMD64_OpMaskedDivFloat32x16(v) - case OpMaskedDivFloat32x4: - return rewriteValueAMD64_OpMaskedDivFloat32x4(v) - case OpMaskedDivFloat32x8: - return rewriteValueAMD64_OpMaskedDivFloat32x8(v) - case OpMaskedDivFloat64x2: - return rewriteValueAMD64_OpMaskedDivFloat64x2(v) - case OpMaskedDivFloat64x4: - return rewriteValueAMD64_OpMaskedDivFloat64x4(v) - case OpMaskedDivFloat64x8: - return rewriteValueAMD64_OpMaskedDivFloat64x8(v) - case OpMaskedEqualFloat32x16: - return rewriteValueAMD64_OpMaskedEqualFloat32x16(v) - case OpMaskedEqualFloat32x4: - return rewriteValueAMD64_OpMaskedEqualFloat32x4(v) - case OpMaskedEqualFloat32x8: - return rewriteValueAMD64_OpMaskedEqualFloat32x8(v) - case OpMaskedEqualFloat64x2: - return rewriteValueAMD64_OpMaskedEqualFloat64x2(v) - case OpMaskedEqualFloat64x4: - return rewriteValueAMD64_OpMaskedEqualFloat64x4(v) - case OpMaskedEqualFloat64x8: - return rewriteValueAMD64_OpMaskedEqualFloat64x8(v) - case OpMaskedEqualInt16x16: - return rewriteValueAMD64_OpMaskedEqualInt16x16(v) - case OpMaskedEqualInt16x32: - return rewriteValueAMD64_OpMaskedEqualInt16x32(v) - case OpMaskedEqualInt16x8: - return rewriteValueAMD64_OpMaskedEqualInt16x8(v) - case OpMaskedEqualInt32x16: - return rewriteValueAMD64_OpMaskedEqualInt32x16(v) - case OpMaskedEqualInt32x4: - return rewriteValueAMD64_OpMaskedEqualInt32x4(v) - case OpMaskedEqualInt32x8: - return rewriteValueAMD64_OpMaskedEqualInt32x8(v) - case OpMaskedEqualInt64x2: - return rewriteValueAMD64_OpMaskedEqualInt64x2(v) - case OpMaskedEqualInt64x4: - return rewriteValueAMD64_OpMaskedEqualInt64x4(v) - case OpMaskedEqualInt64x8: - return rewriteValueAMD64_OpMaskedEqualInt64x8(v) - case OpMaskedEqualInt8x16: - return rewriteValueAMD64_OpMaskedEqualInt8x16(v) - case OpMaskedEqualInt8x32: - return rewriteValueAMD64_OpMaskedEqualInt8x32(v) - case OpMaskedEqualInt8x64: - return rewriteValueAMD64_OpMaskedEqualInt8x64(v) - case OpMaskedEqualUint16x16: - return rewriteValueAMD64_OpMaskedEqualUint16x16(v) - case OpMaskedEqualUint16x32: - return rewriteValueAMD64_OpMaskedEqualUint16x32(v) - case OpMaskedEqualUint16x8: - return rewriteValueAMD64_OpMaskedEqualUint16x8(v) - case OpMaskedEqualUint32x16: - return rewriteValueAMD64_OpMaskedEqualUint32x16(v) - case OpMaskedEqualUint32x4: - return rewriteValueAMD64_OpMaskedEqualUint32x4(v) - case OpMaskedEqualUint32x8: - return rewriteValueAMD64_OpMaskedEqualUint32x8(v) - case OpMaskedEqualUint64x2: - return rewriteValueAMD64_OpMaskedEqualUint64x2(v) - case OpMaskedEqualUint64x4: - return rewriteValueAMD64_OpMaskedEqualUint64x4(v) - case OpMaskedEqualUint64x8: - return rewriteValueAMD64_OpMaskedEqualUint64x8(v) - case OpMaskedEqualUint8x16: - return rewriteValueAMD64_OpMaskedEqualUint8x16(v) - case OpMaskedEqualUint8x32: - return rewriteValueAMD64_OpMaskedEqualUint8x32(v) - case OpMaskedEqualUint8x64: - return rewriteValueAMD64_OpMaskedEqualUint8x64(v) - case OpMaskedFloorWithPrecisionFloat32x16: - return rewriteValueAMD64_OpMaskedFloorWithPrecisionFloat32x16(v) - case OpMaskedFloorWithPrecisionFloat32x4: - return rewriteValueAMD64_OpMaskedFloorWithPrecisionFloat32x4(v) - case OpMaskedFloorWithPrecisionFloat32x8: - return rewriteValueAMD64_OpMaskedFloorWithPrecisionFloat32x8(v) - case OpMaskedFloorWithPrecisionFloat64x2: - return rewriteValueAMD64_OpMaskedFloorWithPrecisionFloat64x2(v) - case OpMaskedFloorWithPrecisionFloat64x4: - return rewriteValueAMD64_OpMaskedFloorWithPrecisionFloat64x4(v) - case OpMaskedFloorWithPrecisionFloat64x8: - return rewriteValueAMD64_OpMaskedFloorWithPrecisionFloat64x8(v) - case OpMaskedFusedMultiplyAddFloat32x16: - return rewriteValueAMD64_OpMaskedFusedMultiplyAddFloat32x16(v) - case OpMaskedFusedMultiplyAddFloat32x4: - return rewriteValueAMD64_OpMaskedFusedMultiplyAddFloat32x4(v) - case OpMaskedFusedMultiplyAddFloat32x8: - return rewriteValueAMD64_OpMaskedFusedMultiplyAddFloat32x8(v) - case OpMaskedFusedMultiplyAddFloat64x2: - return rewriteValueAMD64_OpMaskedFusedMultiplyAddFloat64x2(v) - case OpMaskedFusedMultiplyAddFloat64x4: - return rewriteValueAMD64_OpMaskedFusedMultiplyAddFloat64x4(v) - case OpMaskedFusedMultiplyAddFloat64x8: - return rewriteValueAMD64_OpMaskedFusedMultiplyAddFloat64x8(v) - case OpMaskedFusedMultiplyAddSubFloat32x16: - return rewriteValueAMD64_OpMaskedFusedMultiplyAddSubFloat32x16(v) - case OpMaskedFusedMultiplyAddSubFloat32x4: - return rewriteValueAMD64_OpMaskedFusedMultiplyAddSubFloat32x4(v) - case OpMaskedFusedMultiplyAddSubFloat32x8: - return rewriteValueAMD64_OpMaskedFusedMultiplyAddSubFloat32x8(v) - case OpMaskedFusedMultiplyAddSubFloat64x2: - return rewriteValueAMD64_OpMaskedFusedMultiplyAddSubFloat64x2(v) - case OpMaskedFusedMultiplyAddSubFloat64x4: - return rewriteValueAMD64_OpMaskedFusedMultiplyAddSubFloat64x4(v) - case OpMaskedFusedMultiplyAddSubFloat64x8: - return rewriteValueAMD64_OpMaskedFusedMultiplyAddSubFloat64x8(v) - case OpMaskedFusedMultiplySubAddFloat32x16: - return rewriteValueAMD64_OpMaskedFusedMultiplySubAddFloat32x16(v) - case OpMaskedFusedMultiplySubAddFloat32x4: - return rewriteValueAMD64_OpMaskedFusedMultiplySubAddFloat32x4(v) - case OpMaskedFusedMultiplySubAddFloat32x8: - return rewriteValueAMD64_OpMaskedFusedMultiplySubAddFloat32x8(v) - case OpMaskedFusedMultiplySubAddFloat64x2: - return rewriteValueAMD64_OpMaskedFusedMultiplySubAddFloat64x2(v) - case OpMaskedFusedMultiplySubAddFloat64x4: - return rewriteValueAMD64_OpMaskedFusedMultiplySubAddFloat64x4(v) - case OpMaskedFusedMultiplySubAddFloat64x8: - return rewriteValueAMD64_OpMaskedFusedMultiplySubAddFloat64x8(v) - case OpMaskedGaloisFieldAffineTransformInversedUint8x16: - return rewriteValueAMD64_OpMaskedGaloisFieldAffineTransformInversedUint8x16(v) - case OpMaskedGaloisFieldAffineTransformInversedUint8x32: - return rewriteValueAMD64_OpMaskedGaloisFieldAffineTransformInversedUint8x32(v) - case OpMaskedGaloisFieldAffineTransformInversedUint8x64: - return rewriteValueAMD64_OpMaskedGaloisFieldAffineTransformInversedUint8x64(v) - case OpMaskedGaloisFieldAffineTransformUint8x16: - return rewriteValueAMD64_OpMaskedGaloisFieldAffineTransformUint8x16(v) - case OpMaskedGaloisFieldAffineTransformUint8x32: - return rewriteValueAMD64_OpMaskedGaloisFieldAffineTransformUint8x32(v) - case OpMaskedGaloisFieldAffineTransformUint8x64: - return rewriteValueAMD64_OpMaskedGaloisFieldAffineTransformUint8x64(v) - case OpMaskedGaloisFieldMulUint8x16: - return rewriteValueAMD64_OpMaskedGaloisFieldMulUint8x16(v) - case OpMaskedGaloisFieldMulUint8x32: - return rewriteValueAMD64_OpMaskedGaloisFieldMulUint8x32(v) - case OpMaskedGaloisFieldMulUint8x64: - return rewriteValueAMD64_OpMaskedGaloisFieldMulUint8x64(v) - case OpMaskedGreaterEqualFloat32x16: - return rewriteValueAMD64_OpMaskedGreaterEqualFloat32x16(v) - case OpMaskedGreaterEqualFloat32x4: - return rewriteValueAMD64_OpMaskedGreaterEqualFloat32x4(v) - case OpMaskedGreaterEqualFloat32x8: - return rewriteValueAMD64_OpMaskedGreaterEqualFloat32x8(v) - case OpMaskedGreaterEqualFloat64x2: - return rewriteValueAMD64_OpMaskedGreaterEqualFloat64x2(v) - case OpMaskedGreaterEqualFloat64x4: - return rewriteValueAMD64_OpMaskedGreaterEqualFloat64x4(v) - case OpMaskedGreaterEqualFloat64x8: - return rewriteValueAMD64_OpMaskedGreaterEqualFloat64x8(v) - case OpMaskedGreaterEqualInt16x16: - return rewriteValueAMD64_OpMaskedGreaterEqualInt16x16(v) - case OpMaskedGreaterEqualInt16x32: - return rewriteValueAMD64_OpMaskedGreaterEqualInt16x32(v) - case OpMaskedGreaterEqualInt16x8: - return rewriteValueAMD64_OpMaskedGreaterEqualInt16x8(v) - case OpMaskedGreaterEqualInt32x16: - return rewriteValueAMD64_OpMaskedGreaterEqualInt32x16(v) - case OpMaskedGreaterEqualInt32x4: - return rewriteValueAMD64_OpMaskedGreaterEqualInt32x4(v) - case OpMaskedGreaterEqualInt32x8: - return rewriteValueAMD64_OpMaskedGreaterEqualInt32x8(v) - case OpMaskedGreaterEqualInt64x2: - return rewriteValueAMD64_OpMaskedGreaterEqualInt64x2(v) - case OpMaskedGreaterEqualInt64x4: - return rewriteValueAMD64_OpMaskedGreaterEqualInt64x4(v) - case OpMaskedGreaterEqualInt64x8: - return rewriteValueAMD64_OpMaskedGreaterEqualInt64x8(v) - case OpMaskedGreaterEqualInt8x16: - return rewriteValueAMD64_OpMaskedGreaterEqualInt8x16(v) - case OpMaskedGreaterEqualInt8x32: - return rewriteValueAMD64_OpMaskedGreaterEqualInt8x32(v) - case OpMaskedGreaterEqualInt8x64: - return rewriteValueAMD64_OpMaskedGreaterEqualInt8x64(v) - case OpMaskedGreaterEqualUint16x16: - return rewriteValueAMD64_OpMaskedGreaterEqualUint16x16(v) - case OpMaskedGreaterEqualUint16x32: - return rewriteValueAMD64_OpMaskedGreaterEqualUint16x32(v) - case OpMaskedGreaterEqualUint16x8: - return rewriteValueAMD64_OpMaskedGreaterEqualUint16x8(v) - case OpMaskedGreaterEqualUint32x16: - return rewriteValueAMD64_OpMaskedGreaterEqualUint32x16(v) - case OpMaskedGreaterEqualUint32x4: - return rewriteValueAMD64_OpMaskedGreaterEqualUint32x4(v) - case OpMaskedGreaterEqualUint32x8: - return rewriteValueAMD64_OpMaskedGreaterEqualUint32x8(v) - case OpMaskedGreaterEqualUint64x2: - return rewriteValueAMD64_OpMaskedGreaterEqualUint64x2(v) - case OpMaskedGreaterEqualUint64x4: - return rewriteValueAMD64_OpMaskedGreaterEqualUint64x4(v) - case OpMaskedGreaterEqualUint64x8: - return rewriteValueAMD64_OpMaskedGreaterEqualUint64x8(v) - case OpMaskedGreaterEqualUint8x16: - return rewriteValueAMD64_OpMaskedGreaterEqualUint8x16(v) - case OpMaskedGreaterEqualUint8x32: - return rewriteValueAMD64_OpMaskedGreaterEqualUint8x32(v) - case OpMaskedGreaterEqualUint8x64: - return rewriteValueAMD64_OpMaskedGreaterEqualUint8x64(v) - case OpMaskedGreaterFloat32x16: - return rewriteValueAMD64_OpMaskedGreaterFloat32x16(v) - case OpMaskedGreaterFloat32x4: - return rewriteValueAMD64_OpMaskedGreaterFloat32x4(v) - case OpMaskedGreaterFloat32x8: - return rewriteValueAMD64_OpMaskedGreaterFloat32x8(v) - case OpMaskedGreaterFloat64x2: - return rewriteValueAMD64_OpMaskedGreaterFloat64x2(v) - case OpMaskedGreaterFloat64x4: - return rewriteValueAMD64_OpMaskedGreaterFloat64x4(v) - case OpMaskedGreaterFloat64x8: - return rewriteValueAMD64_OpMaskedGreaterFloat64x8(v) - case OpMaskedGreaterInt16x16: - return rewriteValueAMD64_OpMaskedGreaterInt16x16(v) - case OpMaskedGreaterInt16x32: - return rewriteValueAMD64_OpMaskedGreaterInt16x32(v) - case OpMaskedGreaterInt16x8: - return rewriteValueAMD64_OpMaskedGreaterInt16x8(v) - case OpMaskedGreaterInt32x16: - return rewriteValueAMD64_OpMaskedGreaterInt32x16(v) - case OpMaskedGreaterInt32x4: - return rewriteValueAMD64_OpMaskedGreaterInt32x4(v) - case OpMaskedGreaterInt32x8: - return rewriteValueAMD64_OpMaskedGreaterInt32x8(v) - case OpMaskedGreaterInt64x2: - return rewriteValueAMD64_OpMaskedGreaterInt64x2(v) - case OpMaskedGreaterInt64x4: - return rewriteValueAMD64_OpMaskedGreaterInt64x4(v) - case OpMaskedGreaterInt64x8: - return rewriteValueAMD64_OpMaskedGreaterInt64x8(v) - case OpMaskedGreaterInt8x16: - return rewriteValueAMD64_OpMaskedGreaterInt8x16(v) - case OpMaskedGreaterInt8x32: - return rewriteValueAMD64_OpMaskedGreaterInt8x32(v) - case OpMaskedGreaterInt8x64: - return rewriteValueAMD64_OpMaskedGreaterInt8x64(v) - case OpMaskedGreaterUint16x16: - return rewriteValueAMD64_OpMaskedGreaterUint16x16(v) - case OpMaskedGreaterUint16x32: - return rewriteValueAMD64_OpMaskedGreaterUint16x32(v) - case OpMaskedGreaterUint16x8: - return rewriteValueAMD64_OpMaskedGreaterUint16x8(v) - case OpMaskedGreaterUint32x16: - return rewriteValueAMD64_OpMaskedGreaterUint32x16(v) - case OpMaskedGreaterUint32x4: - return rewriteValueAMD64_OpMaskedGreaterUint32x4(v) - case OpMaskedGreaterUint32x8: - return rewriteValueAMD64_OpMaskedGreaterUint32x8(v) - case OpMaskedGreaterUint64x2: - return rewriteValueAMD64_OpMaskedGreaterUint64x2(v) - case OpMaskedGreaterUint64x4: - return rewriteValueAMD64_OpMaskedGreaterUint64x4(v) - case OpMaskedGreaterUint64x8: - return rewriteValueAMD64_OpMaskedGreaterUint64x8(v) - case OpMaskedGreaterUint8x16: - return rewriteValueAMD64_OpMaskedGreaterUint8x16(v) - case OpMaskedGreaterUint8x32: - return rewriteValueAMD64_OpMaskedGreaterUint8x32(v) - case OpMaskedGreaterUint8x64: - return rewriteValueAMD64_OpMaskedGreaterUint8x64(v) - case OpMaskedIsNanFloat32x16: - return rewriteValueAMD64_OpMaskedIsNanFloat32x16(v) - case OpMaskedIsNanFloat32x4: - return rewriteValueAMD64_OpMaskedIsNanFloat32x4(v) - case OpMaskedIsNanFloat32x8: - return rewriteValueAMD64_OpMaskedIsNanFloat32x8(v) - case OpMaskedIsNanFloat64x2: - return rewriteValueAMD64_OpMaskedIsNanFloat64x2(v) - case OpMaskedIsNanFloat64x4: - return rewriteValueAMD64_OpMaskedIsNanFloat64x4(v) - case OpMaskedIsNanFloat64x8: - return rewriteValueAMD64_OpMaskedIsNanFloat64x8(v) - case OpMaskedLessEqualFloat32x16: - return rewriteValueAMD64_OpMaskedLessEqualFloat32x16(v) - case OpMaskedLessEqualFloat32x4: - return rewriteValueAMD64_OpMaskedLessEqualFloat32x4(v) - case OpMaskedLessEqualFloat32x8: - return rewriteValueAMD64_OpMaskedLessEqualFloat32x8(v) - case OpMaskedLessEqualFloat64x2: - return rewriteValueAMD64_OpMaskedLessEqualFloat64x2(v) - case OpMaskedLessEqualFloat64x4: - return rewriteValueAMD64_OpMaskedLessEqualFloat64x4(v) - case OpMaskedLessEqualFloat64x8: - return rewriteValueAMD64_OpMaskedLessEqualFloat64x8(v) - case OpMaskedLessEqualInt16x16: - return rewriteValueAMD64_OpMaskedLessEqualInt16x16(v) - case OpMaskedLessEqualInt16x32: - return rewriteValueAMD64_OpMaskedLessEqualInt16x32(v) - case OpMaskedLessEqualInt16x8: - return rewriteValueAMD64_OpMaskedLessEqualInt16x8(v) - case OpMaskedLessEqualInt32x16: - return rewriteValueAMD64_OpMaskedLessEqualInt32x16(v) - case OpMaskedLessEqualInt32x4: - return rewriteValueAMD64_OpMaskedLessEqualInt32x4(v) - case OpMaskedLessEqualInt32x8: - return rewriteValueAMD64_OpMaskedLessEqualInt32x8(v) - case OpMaskedLessEqualInt64x2: - return rewriteValueAMD64_OpMaskedLessEqualInt64x2(v) - case OpMaskedLessEqualInt64x4: - return rewriteValueAMD64_OpMaskedLessEqualInt64x4(v) - case OpMaskedLessEqualInt64x8: - return rewriteValueAMD64_OpMaskedLessEqualInt64x8(v) - case OpMaskedLessEqualInt8x16: - return rewriteValueAMD64_OpMaskedLessEqualInt8x16(v) - case OpMaskedLessEqualInt8x32: - return rewriteValueAMD64_OpMaskedLessEqualInt8x32(v) - case OpMaskedLessEqualInt8x64: - return rewriteValueAMD64_OpMaskedLessEqualInt8x64(v) - case OpMaskedLessEqualUint16x16: - return rewriteValueAMD64_OpMaskedLessEqualUint16x16(v) - case OpMaskedLessEqualUint16x32: - return rewriteValueAMD64_OpMaskedLessEqualUint16x32(v) - case OpMaskedLessEqualUint16x8: - return rewriteValueAMD64_OpMaskedLessEqualUint16x8(v) - case OpMaskedLessEqualUint32x16: - return rewriteValueAMD64_OpMaskedLessEqualUint32x16(v) - case OpMaskedLessEqualUint32x4: - return rewriteValueAMD64_OpMaskedLessEqualUint32x4(v) - case OpMaskedLessEqualUint32x8: - return rewriteValueAMD64_OpMaskedLessEqualUint32x8(v) - case OpMaskedLessEqualUint64x2: - return rewriteValueAMD64_OpMaskedLessEqualUint64x2(v) - case OpMaskedLessEqualUint64x4: - return rewriteValueAMD64_OpMaskedLessEqualUint64x4(v) - case OpMaskedLessEqualUint64x8: - return rewriteValueAMD64_OpMaskedLessEqualUint64x8(v) - case OpMaskedLessEqualUint8x16: - return rewriteValueAMD64_OpMaskedLessEqualUint8x16(v) - case OpMaskedLessEqualUint8x32: - return rewriteValueAMD64_OpMaskedLessEqualUint8x32(v) - case OpMaskedLessEqualUint8x64: - return rewriteValueAMD64_OpMaskedLessEqualUint8x64(v) - case OpMaskedLessFloat32x16: - return rewriteValueAMD64_OpMaskedLessFloat32x16(v) - case OpMaskedLessFloat32x4: - return rewriteValueAMD64_OpMaskedLessFloat32x4(v) - case OpMaskedLessFloat32x8: - return rewriteValueAMD64_OpMaskedLessFloat32x8(v) - case OpMaskedLessFloat64x2: - return rewriteValueAMD64_OpMaskedLessFloat64x2(v) - case OpMaskedLessFloat64x4: - return rewriteValueAMD64_OpMaskedLessFloat64x4(v) - case OpMaskedLessFloat64x8: - return rewriteValueAMD64_OpMaskedLessFloat64x8(v) - case OpMaskedLessInt16x16: - return rewriteValueAMD64_OpMaskedLessInt16x16(v) - case OpMaskedLessInt16x32: - return rewriteValueAMD64_OpMaskedLessInt16x32(v) - case OpMaskedLessInt16x8: - return rewriteValueAMD64_OpMaskedLessInt16x8(v) - case OpMaskedLessInt32x16: - return rewriteValueAMD64_OpMaskedLessInt32x16(v) - case OpMaskedLessInt32x4: - return rewriteValueAMD64_OpMaskedLessInt32x4(v) - case OpMaskedLessInt32x8: - return rewriteValueAMD64_OpMaskedLessInt32x8(v) - case OpMaskedLessInt64x2: - return rewriteValueAMD64_OpMaskedLessInt64x2(v) - case OpMaskedLessInt64x4: - return rewriteValueAMD64_OpMaskedLessInt64x4(v) - case OpMaskedLessInt64x8: - return rewriteValueAMD64_OpMaskedLessInt64x8(v) - case OpMaskedLessInt8x16: - return rewriteValueAMD64_OpMaskedLessInt8x16(v) - case OpMaskedLessInt8x32: - return rewriteValueAMD64_OpMaskedLessInt8x32(v) - case OpMaskedLessInt8x64: - return rewriteValueAMD64_OpMaskedLessInt8x64(v) - case OpMaskedLessUint16x16: - return rewriteValueAMD64_OpMaskedLessUint16x16(v) - case OpMaskedLessUint16x32: - return rewriteValueAMD64_OpMaskedLessUint16x32(v) - case OpMaskedLessUint16x8: - return rewriteValueAMD64_OpMaskedLessUint16x8(v) - case OpMaskedLessUint32x16: - return rewriteValueAMD64_OpMaskedLessUint32x16(v) - case OpMaskedLessUint32x4: - return rewriteValueAMD64_OpMaskedLessUint32x4(v) - case OpMaskedLessUint32x8: - return rewriteValueAMD64_OpMaskedLessUint32x8(v) - case OpMaskedLessUint64x2: - return rewriteValueAMD64_OpMaskedLessUint64x2(v) - case OpMaskedLessUint64x4: - return rewriteValueAMD64_OpMaskedLessUint64x4(v) - case OpMaskedLessUint64x8: - return rewriteValueAMD64_OpMaskedLessUint64x8(v) - case OpMaskedLessUint8x16: - return rewriteValueAMD64_OpMaskedLessUint8x16(v) - case OpMaskedLessUint8x32: - return rewriteValueAMD64_OpMaskedLessUint8x32(v) - case OpMaskedLessUint8x64: - return rewriteValueAMD64_OpMaskedLessUint8x64(v) - case OpMaskedMaxFloat32x16: - return rewriteValueAMD64_OpMaskedMaxFloat32x16(v) - case OpMaskedMaxFloat32x4: - return rewriteValueAMD64_OpMaskedMaxFloat32x4(v) - case OpMaskedMaxFloat32x8: - return rewriteValueAMD64_OpMaskedMaxFloat32x8(v) - case OpMaskedMaxFloat64x2: - return rewriteValueAMD64_OpMaskedMaxFloat64x2(v) - case OpMaskedMaxFloat64x4: - return rewriteValueAMD64_OpMaskedMaxFloat64x4(v) - case OpMaskedMaxFloat64x8: - return rewriteValueAMD64_OpMaskedMaxFloat64x8(v) - case OpMaskedMaxInt16x16: - return rewriteValueAMD64_OpMaskedMaxInt16x16(v) - case OpMaskedMaxInt16x32: - return rewriteValueAMD64_OpMaskedMaxInt16x32(v) - case OpMaskedMaxInt16x8: - return rewriteValueAMD64_OpMaskedMaxInt16x8(v) - case OpMaskedMaxInt32x16: - return rewriteValueAMD64_OpMaskedMaxInt32x16(v) - case OpMaskedMaxInt32x4: - return rewriteValueAMD64_OpMaskedMaxInt32x4(v) - case OpMaskedMaxInt32x8: - return rewriteValueAMD64_OpMaskedMaxInt32x8(v) - case OpMaskedMaxInt64x2: - return rewriteValueAMD64_OpMaskedMaxInt64x2(v) - case OpMaskedMaxInt64x4: - return rewriteValueAMD64_OpMaskedMaxInt64x4(v) - case OpMaskedMaxInt64x8: - return rewriteValueAMD64_OpMaskedMaxInt64x8(v) - case OpMaskedMaxInt8x16: - return rewriteValueAMD64_OpMaskedMaxInt8x16(v) - case OpMaskedMaxInt8x32: - return rewriteValueAMD64_OpMaskedMaxInt8x32(v) - case OpMaskedMaxInt8x64: - return rewriteValueAMD64_OpMaskedMaxInt8x64(v) - case OpMaskedMaxUint16x16: - return rewriteValueAMD64_OpMaskedMaxUint16x16(v) - case OpMaskedMaxUint16x32: - return rewriteValueAMD64_OpMaskedMaxUint16x32(v) - case OpMaskedMaxUint16x8: - return rewriteValueAMD64_OpMaskedMaxUint16x8(v) - case OpMaskedMaxUint32x16: - return rewriteValueAMD64_OpMaskedMaxUint32x16(v) - case OpMaskedMaxUint32x4: - return rewriteValueAMD64_OpMaskedMaxUint32x4(v) - case OpMaskedMaxUint32x8: - return rewriteValueAMD64_OpMaskedMaxUint32x8(v) - case OpMaskedMaxUint64x2: - return rewriteValueAMD64_OpMaskedMaxUint64x2(v) - case OpMaskedMaxUint64x4: - return rewriteValueAMD64_OpMaskedMaxUint64x4(v) - case OpMaskedMaxUint64x8: - return rewriteValueAMD64_OpMaskedMaxUint64x8(v) - case OpMaskedMaxUint8x16: - return rewriteValueAMD64_OpMaskedMaxUint8x16(v) - case OpMaskedMaxUint8x32: - return rewriteValueAMD64_OpMaskedMaxUint8x32(v) - case OpMaskedMaxUint8x64: - return rewriteValueAMD64_OpMaskedMaxUint8x64(v) - case OpMaskedMinFloat32x16: - return rewriteValueAMD64_OpMaskedMinFloat32x16(v) - case OpMaskedMinFloat32x4: - return rewriteValueAMD64_OpMaskedMinFloat32x4(v) - case OpMaskedMinFloat32x8: - return rewriteValueAMD64_OpMaskedMinFloat32x8(v) - case OpMaskedMinFloat64x2: - return rewriteValueAMD64_OpMaskedMinFloat64x2(v) - case OpMaskedMinFloat64x4: - return rewriteValueAMD64_OpMaskedMinFloat64x4(v) - case OpMaskedMinFloat64x8: - return rewriteValueAMD64_OpMaskedMinFloat64x8(v) - case OpMaskedMinInt16x16: - return rewriteValueAMD64_OpMaskedMinInt16x16(v) - case OpMaskedMinInt16x32: - return rewriteValueAMD64_OpMaskedMinInt16x32(v) - case OpMaskedMinInt16x8: - return rewriteValueAMD64_OpMaskedMinInt16x8(v) - case OpMaskedMinInt32x16: - return rewriteValueAMD64_OpMaskedMinInt32x16(v) - case OpMaskedMinInt32x4: - return rewriteValueAMD64_OpMaskedMinInt32x4(v) - case OpMaskedMinInt32x8: - return rewriteValueAMD64_OpMaskedMinInt32x8(v) - case OpMaskedMinInt64x2: - return rewriteValueAMD64_OpMaskedMinInt64x2(v) - case OpMaskedMinInt64x4: - return rewriteValueAMD64_OpMaskedMinInt64x4(v) - case OpMaskedMinInt64x8: - return rewriteValueAMD64_OpMaskedMinInt64x8(v) - case OpMaskedMinInt8x16: - return rewriteValueAMD64_OpMaskedMinInt8x16(v) - case OpMaskedMinInt8x32: - return rewriteValueAMD64_OpMaskedMinInt8x32(v) - case OpMaskedMinInt8x64: - return rewriteValueAMD64_OpMaskedMinInt8x64(v) - case OpMaskedMinUint16x16: - return rewriteValueAMD64_OpMaskedMinUint16x16(v) - case OpMaskedMinUint16x32: - return rewriteValueAMD64_OpMaskedMinUint16x32(v) - case OpMaskedMinUint16x8: - return rewriteValueAMD64_OpMaskedMinUint16x8(v) - case OpMaskedMinUint32x16: - return rewriteValueAMD64_OpMaskedMinUint32x16(v) - case OpMaskedMinUint32x4: - return rewriteValueAMD64_OpMaskedMinUint32x4(v) - case OpMaskedMinUint32x8: - return rewriteValueAMD64_OpMaskedMinUint32x8(v) - case OpMaskedMinUint64x2: - return rewriteValueAMD64_OpMaskedMinUint64x2(v) - case OpMaskedMinUint64x4: - return rewriteValueAMD64_OpMaskedMinUint64x4(v) - case OpMaskedMinUint64x8: - return rewriteValueAMD64_OpMaskedMinUint64x8(v) - case OpMaskedMinUint8x16: - return rewriteValueAMD64_OpMaskedMinUint8x16(v) - case OpMaskedMinUint8x32: - return rewriteValueAMD64_OpMaskedMinUint8x32(v) - case OpMaskedMinUint8x64: - return rewriteValueAMD64_OpMaskedMinUint8x64(v) - case OpMaskedMulByPowOf2Float32x16: - return rewriteValueAMD64_OpMaskedMulByPowOf2Float32x16(v) - case OpMaskedMulByPowOf2Float32x4: - return rewriteValueAMD64_OpMaskedMulByPowOf2Float32x4(v) - case OpMaskedMulByPowOf2Float32x8: - return rewriteValueAMD64_OpMaskedMulByPowOf2Float32x8(v) - case OpMaskedMulByPowOf2Float64x2: - return rewriteValueAMD64_OpMaskedMulByPowOf2Float64x2(v) - case OpMaskedMulByPowOf2Float64x4: - return rewriteValueAMD64_OpMaskedMulByPowOf2Float64x4(v) - case OpMaskedMulByPowOf2Float64x8: - return rewriteValueAMD64_OpMaskedMulByPowOf2Float64x8(v) - case OpMaskedMulEvenWidenInt64x2: - return rewriteValueAMD64_OpMaskedMulEvenWidenInt64x2(v) - case OpMaskedMulEvenWidenInt64x4: - return rewriteValueAMD64_OpMaskedMulEvenWidenInt64x4(v) - case OpMaskedMulEvenWidenInt64x8: - return rewriteValueAMD64_OpMaskedMulEvenWidenInt64x8(v) - case OpMaskedMulEvenWidenUint64x2: - return rewriteValueAMD64_OpMaskedMulEvenWidenUint64x2(v) - case OpMaskedMulEvenWidenUint64x4: - return rewriteValueAMD64_OpMaskedMulEvenWidenUint64x4(v) - case OpMaskedMulEvenWidenUint64x8: - return rewriteValueAMD64_OpMaskedMulEvenWidenUint64x8(v) - case OpMaskedMulFloat32x16: - return rewriteValueAMD64_OpMaskedMulFloat32x16(v) - case OpMaskedMulFloat32x4: - return rewriteValueAMD64_OpMaskedMulFloat32x4(v) - case OpMaskedMulFloat32x8: - return rewriteValueAMD64_OpMaskedMulFloat32x8(v) - case OpMaskedMulFloat64x2: - return rewriteValueAMD64_OpMaskedMulFloat64x2(v) - case OpMaskedMulFloat64x4: - return rewriteValueAMD64_OpMaskedMulFloat64x4(v) - case OpMaskedMulFloat64x8: - return rewriteValueAMD64_OpMaskedMulFloat64x8(v) - case OpMaskedMulHighInt16x16: - return rewriteValueAMD64_OpMaskedMulHighInt16x16(v) - case OpMaskedMulHighInt16x32: - return rewriteValueAMD64_OpMaskedMulHighInt16x32(v) - case OpMaskedMulHighInt16x8: - return rewriteValueAMD64_OpMaskedMulHighInt16x8(v) - case OpMaskedMulHighUint16x16: - return rewriteValueAMD64_OpMaskedMulHighUint16x16(v) - case OpMaskedMulHighUint16x32: - return rewriteValueAMD64_OpMaskedMulHighUint16x32(v) - case OpMaskedMulHighUint16x8: - return rewriteValueAMD64_OpMaskedMulHighUint16x8(v) - case OpMaskedMulLowInt16x16: - return rewriteValueAMD64_OpMaskedMulLowInt16x16(v) - case OpMaskedMulLowInt16x32: - return rewriteValueAMD64_OpMaskedMulLowInt16x32(v) - case OpMaskedMulLowInt16x8: - return rewriteValueAMD64_OpMaskedMulLowInt16x8(v) - case OpMaskedMulLowInt32x16: - return rewriteValueAMD64_OpMaskedMulLowInt32x16(v) - case OpMaskedMulLowInt32x4: - return rewriteValueAMD64_OpMaskedMulLowInt32x4(v) - case OpMaskedMulLowInt32x8: - return rewriteValueAMD64_OpMaskedMulLowInt32x8(v) - case OpMaskedMulLowInt64x2: - return rewriteValueAMD64_OpMaskedMulLowInt64x2(v) - case OpMaskedMulLowInt64x4: - return rewriteValueAMD64_OpMaskedMulLowInt64x4(v) - case OpMaskedMulLowInt64x8: - return rewriteValueAMD64_OpMaskedMulLowInt64x8(v) - case OpMaskedNotEqualFloat32x16: - return rewriteValueAMD64_OpMaskedNotEqualFloat32x16(v) - case OpMaskedNotEqualFloat32x4: - return rewriteValueAMD64_OpMaskedNotEqualFloat32x4(v) - case OpMaskedNotEqualFloat32x8: - return rewriteValueAMD64_OpMaskedNotEqualFloat32x8(v) - case OpMaskedNotEqualFloat64x2: - return rewriteValueAMD64_OpMaskedNotEqualFloat64x2(v) - case OpMaskedNotEqualFloat64x4: - return rewriteValueAMD64_OpMaskedNotEqualFloat64x4(v) - case OpMaskedNotEqualFloat64x8: - return rewriteValueAMD64_OpMaskedNotEqualFloat64x8(v) - case OpMaskedNotEqualInt16x16: - return rewriteValueAMD64_OpMaskedNotEqualInt16x16(v) - case OpMaskedNotEqualInt16x32: - return rewriteValueAMD64_OpMaskedNotEqualInt16x32(v) - case OpMaskedNotEqualInt16x8: - return rewriteValueAMD64_OpMaskedNotEqualInt16x8(v) - case OpMaskedNotEqualInt32x16: - return rewriteValueAMD64_OpMaskedNotEqualInt32x16(v) - case OpMaskedNotEqualInt32x4: - return rewriteValueAMD64_OpMaskedNotEqualInt32x4(v) - case OpMaskedNotEqualInt32x8: - return rewriteValueAMD64_OpMaskedNotEqualInt32x8(v) - case OpMaskedNotEqualInt64x2: - return rewriteValueAMD64_OpMaskedNotEqualInt64x2(v) - case OpMaskedNotEqualInt64x4: - return rewriteValueAMD64_OpMaskedNotEqualInt64x4(v) - case OpMaskedNotEqualInt64x8: - return rewriteValueAMD64_OpMaskedNotEqualInt64x8(v) - case OpMaskedNotEqualInt8x16: - return rewriteValueAMD64_OpMaskedNotEqualInt8x16(v) - case OpMaskedNotEqualInt8x32: - return rewriteValueAMD64_OpMaskedNotEqualInt8x32(v) - case OpMaskedNotEqualInt8x64: - return rewriteValueAMD64_OpMaskedNotEqualInt8x64(v) - case OpMaskedNotEqualUint16x16: - return rewriteValueAMD64_OpMaskedNotEqualUint16x16(v) - case OpMaskedNotEqualUint16x32: - return rewriteValueAMD64_OpMaskedNotEqualUint16x32(v) - case OpMaskedNotEqualUint16x8: - return rewriteValueAMD64_OpMaskedNotEqualUint16x8(v) - case OpMaskedNotEqualUint32x16: - return rewriteValueAMD64_OpMaskedNotEqualUint32x16(v) - case OpMaskedNotEqualUint32x4: - return rewriteValueAMD64_OpMaskedNotEqualUint32x4(v) - case OpMaskedNotEqualUint32x8: - return rewriteValueAMD64_OpMaskedNotEqualUint32x8(v) - case OpMaskedNotEqualUint64x2: - return rewriteValueAMD64_OpMaskedNotEqualUint64x2(v) - case OpMaskedNotEqualUint64x4: - return rewriteValueAMD64_OpMaskedNotEqualUint64x4(v) - case OpMaskedNotEqualUint64x8: - return rewriteValueAMD64_OpMaskedNotEqualUint64x8(v) - case OpMaskedNotEqualUint8x16: - return rewriteValueAMD64_OpMaskedNotEqualUint8x16(v) - case OpMaskedNotEqualUint8x32: - return rewriteValueAMD64_OpMaskedNotEqualUint8x32(v) - case OpMaskedNotEqualUint8x64: - return rewriteValueAMD64_OpMaskedNotEqualUint8x64(v) - case OpMaskedOrInt32x16: - return rewriteValueAMD64_OpMaskedOrInt32x16(v) - case OpMaskedOrInt32x4: - return rewriteValueAMD64_OpMaskedOrInt32x4(v) - case OpMaskedOrInt32x8: - return rewriteValueAMD64_OpMaskedOrInt32x8(v) - case OpMaskedOrInt64x2: - return rewriteValueAMD64_OpMaskedOrInt64x2(v) - case OpMaskedOrInt64x4: - return rewriteValueAMD64_OpMaskedOrInt64x4(v) - case OpMaskedOrInt64x8: - return rewriteValueAMD64_OpMaskedOrInt64x8(v) - case OpMaskedOrUint32x16: - return rewriteValueAMD64_OpMaskedOrUint32x16(v) - case OpMaskedOrUint32x4: - return rewriteValueAMD64_OpMaskedOrUint32x4(v) - case OpMaskedOrUint32x8: - return rewriteValueAMD64_OpMaskedOrUint32x8(v) - case OpMaskedOrUint64x2: - return rewriteValueAMD64_OpMaskedOrUint64x2(v) - case OpMaskedOrUint64x4: - return rewriteValueAMD64_OpMaskedOrUint64x4(v) - case OpMaskedOrUint64x8: - return rewriteValueAMD64_OpMaskedOrUint64x8(v) - case OpMaskedPairDotProdAccumulateInt32x16: - return rewriteValueAMD64_OpMaskedPairDotProdAccumulateInt32x16(v) - case OpMaskedPairDotProdAccumulateInt32x4: - return rewriteValueAMD64_OpMaskedPairDotProdAccumulateInt32x4(v) - case OpMaskedPairDotProdAccumulateInt32x8: - return rewriteValueAMD64_OpMaskedPairDotProdAccumulateInt32x8(v) - case OpMaskedPairDotProdInt16x16: - return rewriteValueAMD64_OpMaskedPairDotProdInt16x16(v) - case OpMaskedPairDotProdInt16x32: - return rewriteValueAMD64_OpMaskedPairDotProdInt16x32(v) - case OpMaskedPairDotProdInt16x8: - return rewriteValueAMD64_OpMaskedPairDotProdInt16x8(v) - case OpMaskedPopCountInt16x16: - return rewriteValueAMD64_OpMaskedPopCountInt16x16(v) - case OpMaskedPopCountInt16x32: - return rewriteValueAMD64_OpMaskedPopCountInt16x32(v) - case OpMaskedPopCountInt16x8: - return rewriteValueAMD64_OpMaskedPopCountInt16x8(v) - case OpMaskedPopCountInt32x16: - return rewriteValueAMD64_OpMaskedPopCountInt32x16(v) - case OpMaskedPopCountInt32x4: - return rewriteValueAMD64_OpMaskedPopCountInt32x4(v) - case OpMaskedPopCountInt32x8: - return rewriteValueAMD64_OpMaskedPopCountInt32x8(v) - case OpMaskedPopCountInt64x2: - return rewriteValueAMD64_OpMaskedPopCountInt64x2(v) - case OpMaskedPopCountInt64x4: - return rewriteValueAMD64_OpMaskedPopCountInt64x4(v) - case OpMaskedPopCountInt64x8: - return rewriteValueAMD64_OpMaskedPopCountInt64x8(v) - case OpMaskedPopCountInt8x16: - return rewriteValueAMD64_OpMaskedPopCountInt8x16(v) - case OpMaskedPopCountInt8x32: - return rewriteValueAMD64_OpMaskedPopCountInt8x32(v) - case OpMaskedPopCountInt8x64: - return rewriteValueAMD64_OpMaskedPopCountInt8x64(v) - case OpMaskedPopCountUint16x16: - return rewriteValueAMD64_OpMaskedPopCountUint16x16(v) - case OpMaskedPopCountUint16x32: - return rewriteValueAMD64_OpMaskedPopCountUint16x32(v) - case OpMaskedPopCountUint16x8: - return rewriteValueAMD64_OpMaskedPopCountUint16x8(v) - case OpMaskedPopCountUint32x16: - return rewriteValueAMD64_OpMaskedPopCountUint32x16(v) - case OpMaskedPopCountUint32x4: - return rewriteValueAMD64_OpMaskedPopCountUint32x4(v) - case OpMaskedPopCountUint32x8: - return rewriteValueAMD64_OpMaskedPopCountUint32x8(v) - case OpMaskedPopCountUint64x2: - return rewriteValueAMD64_OpMaskedPopCountUint64x2(v) - case OpMaskedPopCountUint64x4: - return rewriteValueAMD64_OpMaskedPopCountUint64x4(v) - case OpMaskedPopCountUint64x8: - return rewriteValueAMD64_OpMaskedPopCountUint64x8(v) - case OpMaskedPopCountUint8x16: - return rewriteValueAMD64_OpMaskedPopCountUint8x16(v) - case OpMaskedPopCountUint8x32: - return rewriteValueAMD64_OpMaskedPopCountUint8x32(v) - case OpMaskedPopCountUint8x64: - return rewriteValueAMD64_OpMaskedPopCountUint8x64(v) - case OpMaskedRotateAllLeftInt32x16: - return rewriteValueAMD64_OpMaskedRotateAllLeftInt32x16(v) - case OpMaskedRotateAllLeftInt32x4: - return rewriteValueAMD64_OpMaskedRotateAllLeftInt32x4(v) - case OpMaskedRotateAllLeftInt32x8: - return rewriteValueAMD64_OpMaskedRotateAllLeftInt32x8(v) - case OpMaskedRotateAllLeftInt64x2: - return rewriteValueAMD64_OpMaskedRotateAllLeftInt64x2(v) - case OpMaskedRotateAllLeftInt64x4: - return rewriteValueAMD64_OpMaskedRotateAllLeftInt64x4(v) - case OpMaskedRotateAllLeftInt64x8: - return rewriteValueAMD64_OpMaskedRotateAllLeftInt64x8(v) - case OpMaskedRotateAllLeftUint32x16: - return rewriteValueAMD64_OpMaskedRotateAllLeftUint32x16(v) - case OpMaskedRotateAllLeftUint32x4: - return rewriteValueAMD64_OpMaskedRotateAllLeftUint32x4(v) - case OpMaskedRotateAllLeftUint32x8: - return rewriteValueAMD64_OpMaskedRotateAllLeftUint32x8(v) - case OpMaskedRotateAllLeftUint64x2: - return rewriteValueAMD64_OpMaskedRotateAllLeftUint64x2(v) - case OpMaskedRotateAllLeftUint64x4: - return rewriteValueAMD64_OpMaskedRotateAllLeftUint64x4(v) - case OpMaskedRotateAllLeftUint64x8: - return rewriteValueAMD64_OpMaskedRotateAllLeftUint64x8(v) - case OpMaskedRotateAllRightInt32x16: - return rewriteValueAMD64_OpMaskedRotateAllRightInt32x16(v) - case OpMaskedRotateAllRightInt32x4: - return rewriteValueAMD64_OpMaskedRotateAllRightInt32x4(v) - case OpMaskedRotateAllRightInt32x8: - return rewriteValueAMD64_OpMaskedRotateAllRightInt32x8(v) - case OpMaskedRotateAllRightInt64x2: - return rewriteValueAMD64_OpMaskedRotateAllRightInt64x2(v) - case OpMaskedRotateAllRightInt64x4: - return rewriteValueAMD64_OpMaskedRotateAllRightInt64x4(v) - case OpMaskedRotateAllRightInt64x8: - return rewriteValueAMD64_OpMaskedRotateAllRightInt64x8(v) - case OpMaskedRotateAllRightUint32x16: - return rewriteValueAMD64_OpMaskedRotateAllRightUint32x16(v) - case OpMaskedRotateAllRightUint32x4: - return rewriteValueAMD64_OpMaskedRotateAllRightUint32x4(v) - case OpMaskedRotateAllRightUint32x8: - return rewriteValueAMD64_OpMaskedRotateAllRightUint32x8(v) - case OpMaskedRotateAllRightUint64x2: - return rewriteValueAMD64_OpMaskedRotateAllRightUint64x2(v) - case OpMaskedRotateAllRightUint64x4: - return rewriteValueAMD64_OpMaskedRotateAllRightUint64x4(v) - case OpMaskedRotateAllRightUint64x8: - return rewriteValueAMD64_OpMaskedRotateAllRightUint64x8(v) - case OpMaskedRotateLeftInt32x16: - return rewriteValueAMD64_OpMaskedRotateLeftInt32x16(v) - case OpMaskedRotateLeftInt32x4: - return rewriteValueAMD64_OpMaskedRotateLeftInt32x4(v) - case OpMaskedRotateLeftInt32x8: - return rewriteValueAMD64_OpMaskedRotateLeftInt32x8(v) - case OpMaskedRotateLeftInt64x2: - return rewriteValueAMD64_OpMaskedRotateLeftInt64x2(v) - case OpMaskedRotateLeftInt64x4: - return rewriteValueAMD64_OpMaskedRotateLeftInt64x4(v) - case OpMaskedRotateLeftInt64x8: - return rewriteValueAMD64_OpMaskedRotateLeftInt64x8(v) - case OpMaskedRotateLeftUint32x16: - return rewriteValueAMD64_OpMaskedRotateLeftUint32x16(v) - case OpMaskedRotateLeftUint32x4: - return rewriteValueAMD64_OpMaskedRotateLeftUint32x4(v) - case OpMaskedRotateLeftUint32x8: - return rewriteValueAMD64_OpMaskedRotateLeftUint32x8(v) - case OpMaskedRotateLeftUint64x2: - return rewriteValueAMD64_OpMaskedRotateLeftUint64x2(v) - case OpMaskedRotateLeftUint64x4: - return rewriteValueAMD64_OpMaskedRotateLeftUint64x4(v) - case OpMaskedRotateLeftUint64x8: - return rewriteValueAMD64_OpMaskedRotateLeftUint64x8(v) - case OpMaskedRotateRightInt32x16: - return rewriteValueAMD64_OpMaskedRotateRightInt32x16(v) - case OpMaskedRotateRightInt32x4: - return rewriteValueAMD64_OpMaskedRotateRightInt32x4(v) - case OpMaskedRotateRightInt32x8: - return rewriteValueAMD64_OpMaskedRotateRightInt32x8(v) - case OpMaskedRotateRightInt64x2: - return rewriteValueAMD64_OpMaskedRotateRightInt64x2(v) - case OpMaskedRotateRightInt64x4: - return rewriteValueAMD64_OpMaskedRotateRightInt64x4(v) - case OpMaskedRotateRightInt64x8: - return rewriteValueAMD64_OpMaskedRotateRightInt64x8(v) - case OpMaskedRotateRightUint32x16: - return rewriteValueAMD64_OpMaskedRotateRightUint32x16(v) - case OpMaskedRotateRightUint32x4: - return rewriteValueAMD64_OpMaskedRotateRightUint32x4(v) - case OpMaskedRotateRightUint32x8: - return rewriteValueAMD64_OpMaskedRotateRightUint32x8(v) - case OpMaskedRotateRightUint64x2: - return rewriteValueAMD64_OpMaskedRotateRightUint64x2(v) - case OpMaskedRotateRightUint64x4: - return rewriteValueAMD64_OpMaskedRotateRightUint64x4(v) - case OpMaskedRotateRightUint64x8: - return rewriteValueAMD64_OpMaskedRotateRightUint64x8(v) - case OpMaskedRoundWithPrecisionFloat32x16: - return rewriteValueAMD64_OpMaskedRoundWithPrecisionFloat32x16(v) - case OpMaskedRoundWithPrecisionFloat32x4: - return rewriteValueAMD64_OpMaskedRoundWithPrecisionFloat32x4(v) - case OpMaskedRoundWithPrecisionFloat32x8: - return rewriteValueAMD64_OpMaskedRoundWithPrecisionFloat32x8(v) - case OpMaskedRoundWithPrecisionFloat64x2: - return rewriteValueAMD64_OpMaskedRoundWithPrecisionFloat64x2(v) - case OpMaskedRoundWithPrecisionFloat64x4: - return rewriteValueAMD64_OpMaskedRoundWithPrecisionFloat64x4(v) - case OpMaskedRoundWithPrecisionFloat64x8: - return rewriteValueAMD64_OpMaskedRoundWithPrecisionFloat64x8(v) - case OpMaskedSaturatedAddInt16x16: - return rewriteValueAMD64_OpMaskedSaturatedAddInt16x16(v) - case OpMaskedSaturatedAddInt16x32: - return rewriteValueAMD64_OpMaskedSaturatedAddInt16x32(v) - case OpMaskedSaturatedAddInt16x8: - return rewriteValueAMD64_OpMaskedSaturatedAddInt16x8(v) - case OpMaskedSaturatedAddInt8x16: - return rewriteValueAMD64_OpMaskedSaturatedAddInt8x16(v) - case OpMaskedSaturatedAddInt8x32: - return rewriteValueAMD64_OpMaskedSaturatedAddInt8x32(v) - case OpMaskedSaturatedAddInt8x64: - return rewriteValueAMD64_OpMaskedSaturatedAddInt8x64(v) - case OpMaskedSaturatedAddUint16x16: - return rewriteValueAMD64_OpMaskedSaturatedAddUint16x16(v) - case OpMaskedSaturatedAddUint16x32: - return rewriteValueAMD64_OpMaskedSaturatedAddUint16x32(v) - case OpMaskedSaturatedAddUint16x8: - return rewriteValueAMD64_OpMaskedSaturatedAddUint16x8(v) - case OpMaskedSaturatedAddUint8x16: - return rewriteValueAMD64_OpMaskedSaturatedAddUint8x16(v) - case OpMaskedSaturatedAddUint8x32: - return rewriteValueAMD64_OpMaskedSaturatedAddUint8x32(v) - case OpMaskedSaturatedAddUint8x64: - return rewriteValueAMD64_OpMaskedSaturatedAddUint8x64(v) - case OpMaskedSaturatedPairDotProdAccumulateInt32x16: - return rewriteValueAMD64_OpMaskedSaturatedPairDotProdAccumulateInt32x16(v) - case OpMaskedSaturatedPairDotProdAccumulateInt32x4: - return rewriteValueAMD64_OpMaskedSaturatedPairDotProdAccumulateInt32x4(v) - case OpMaskedSaturatedPairDotProdAccumulateInt32x8: - return rewriteValueAMD64_OpMaskedSaturatedPairDotProdAccumulateInt32x8(v) - case OpMaskedSaturatedSubInt16x16: - return rewriteValueAMD64_OpMaskedSaturatedSubInt16x16(v) - case OpMaskedSaturatedSubInt16x32: - return rewriteValueAMD64_OpMaskedSaturatedSubInt16x32(v) - case OpMaskedSaturatedSubInt16x8: - return rewriteValueAMD64_OpMaskedSaturatedSubInt16x8(v) - case OpMaskedSaturatedSubInt8x16: - return rewriteValueAMD64_OpMaskedSaturatedSubInt8x16(v) - case OpMaskedSaturatedSubInt8x32: - return rewriteValueAMD64_OpMaskedSaturatedSubInt8x32(v) - case OpMaskedSaturatedSubInt8x64: - return rewriteValueAMD64_OpMaskedSaturatedSubInt8x64(v) - case OpMaskedSaturatedSubUint16x16: - return rewriteValueAMD64_OpMaskedSaturatedSubUint16x16(v) - case OpMaskedSaturatedSubUint16x32: - return rewriteValueAMD64_OpMaskedSaturatedSubUint16x32(v) - case OpMaskedSaturatedSubUint16x8: - return rewriteValueAMD64_OpMaskedSaturatedSubUint16x8(v) - case OpMaskedSaturatedSubUint8x16: - return rewriteValueAMD64_OpMaskedSaturatedSubUint8x16(v) - case OpMaskedSaturatedSubUint8x32: - return rewriteValueAMD64_OpMaskedSaturatedSubUint8x32(v) - case OpMaskedSaturatedSubUint8x64: - return rewriteValueAMD64_OpMaskedSaturatedSubUint8x64(v) - case OpMaskedSaturatedUnsignedSignedPairDotProdUint8x16: - return rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedPairDotProdUint8x16(v) - case OpMaskedSaturatedUnsignedSignedPairDotProdUint8x32: - return rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedPairDotProdUint8x32(v) - case OpMaskedSaturatedUnsignedSignedPairDotProdUint8x64: - return rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedPairDotProdUint8x64(v) - case OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16: - return rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16(v) - case OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4: - return rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4(v) - case OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8: - return rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8(v) - case OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16: - return rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16(v) - case OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4: - return rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4(v) - case OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8: - return rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8(v) - case OpMaskedShiftAllLeftAndFillUpperFromInt16x16: - return rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromInt16x16(v) - case OpMaskedShiftAllLeftAndFillUpperFromInt16x32: - return rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromInt16x32(v) - case OpMaskedShiftAllLeftAndFillUpperFromInt16x8: - return rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromInt16x8(v) - case OpMaskedShiftAllLeftAndFillUpperFromInt32x16: - return rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromInt32x16(v) - case OpMaskedShiftAllLeftAndFillUpperFromInt32x4: - return rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromInt32x4(v) - case OpMaskedShiftAllLeftAndFillUpperFromInt32x8: - return rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromInt32x8(v) - case OpMaskedShiftAllLeftAndFillUpperFromInt64x2: - return rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromInt64x2(v) - case OpMaskedShiftAllLeftAndFillUpperFromInt64x4: - return rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromInt64x4(v) - case OpMaskedShiftAllLeftAndFillUpperFromInt64x8: - return rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromInt64x8(v) - case OpMaskedShiftAllLeftAndFillUpperFromUint16x16: - return rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromUint16x16(v) - case OpMaskedShiftAllLeftAndFillUpperFromUint16x32: - return rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromUint16x32(v) - case OpMaskedShiftAllLeftAndFillUpperFromUint16x8: - return rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromUint16x8(v) - case OpMaskedShiftAllLeftAndFillUpperFromUint32x16: - return rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromUint32x16(v) - case OpMaskedShiftAllLeftAndFillUpperFromUint32x4: - return rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromUint32x4(v) - case OpMaskedShiftAllLeftAndFillUpperFromUint32x8: - return rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromUint32x8(v) - case OpMaskedShiftAllLeftAndFillUpperFromUint64x2: - return rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromUint64x2(v) - case OpMaskedShiftAllLeftAndFillUpperFromUint64x4: - return rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromUint64x4(v) - case OpMaskedShiftAllLeftAndFillUpperFromUint64x8: - return rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromUint64x8(v) - case OpMaskedShiftAllLeftInt64x2: - return rewriteValueAMD64_OpMaskedShiftAllLeftInt64x2(v) - case OpMaskedShiftAllLeftInt64x4: - return rewriteValueAMD64_OpMaskedShiftAllLeftInt64x4(v) - case OpMaskedShiftAllLeftInt64x8: - return rewriteValueAMD64_OpMaskedShiftAllLeftInt64x8(v) - case OpMaskedShiftAllLeftUint64x2: - return rewriteValueAMD64_OpMaskedShiftAllLeftUint64x2(v) - case OpMaskedShiftAllLeftUint64x4: - return rewriteValueAMD64_OpMaskedShiftAllLeftUint64x4(v) - case OpMaskedShiftAllLeftUint64x8: - return rewriteValueAMD64_OpMaskedShiftAllLeftUint64x8(v) - case OpMaskedShiftAllRightAndFillUpperFromInt16x16: - return rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromInt16x16(v) - case OpMaskedShiftAllRightAndFillUpperFromInt16x32: - return rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromInt16x32(v) - case OpMaskedShiftAllRightAndFillUpperFromInt16x8: - return rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromInt16x8(v) - case OpMaskedShiftAllRightAndFillUpperFromInt32x16: - return rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromInt32x16(v) - case OpMaskedShiftAllRightAndFillUpperFromInt32x4: - return rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromInt32x4(v) - case OpMaskedShiftAllRightAndFillUpperFromInt32x8: - return rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromInt32x8(v) - case OpMaskedShiftAllRightAndFillUpperFromInt64x2: - return rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromInt64x2(v) - case OpMaskedShiftAllRightAndFillUpperFromInt64x4: - return rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromInt64x4(v) - case OpMaskedShiftAllRightAndFillUpperFromInt64x8: - return rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromInt64x8(v) - case OpMaskedShiftAllRightAndFillUpperFromUint16x16: - return rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromUint16x16(v) - case OpMaskedShiftAllRightAndFillUpperFromUint16x32: - return rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromUint16x32(v) - case OpMaskedShiftAllRightAndFillUpperFromUint16x8: - return rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromUint16x8(v) - case OpMaskedShiftAllRightAndFillUpperFromUint32x16: - return rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromUint32x16(v) - case OpMaskedShiftAllRightAndFillUpperFromUint32x4: - return rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromUint32x4(v) - case OpMaskedShiftAllRightAndFillUpperFromUint32x8: - return rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromUint32x8(v) - case OpMaskedShiftAllRightAndFillUpperFromUint64x2: - return rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromUint64x2(v) - case OpMaskedShiftAllRightAndFillUpperFromUint64x4: - return rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromUint64x4(v) - case OpMaskedShiftAllRightAndFillUpperFromUint64x8: - return rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromUint64x8(v) - case OpMaskedShiftAllRightInt64x2: - return rewriteValueAMD64_OpMaskedShiftAllRightInt64x2(v) - case OpMaskedShiftAllRightInt64x4: - return rewriteValueAMD64_OpMaskedShiftAllRightInt64x4(v) - case OpMaskedShiftAllRightInt64x8: - return rewriteValueAMD64_OpMaskedShiftAllRightInt64x8(v) - case OpMaskedShiftAllRightSignExtendedInt64x2: - return rewriteValueAMD64_OpMaskedShiftAllRightSignExtendedInt64x2(v) - case OpMaskedShiftAllRightSignExtendedInt64x4: - return rewriteValueAMD64_OpMaskedShiftAllRightSignExtendedInt64x4(v) - case OpMaskedShiftAllRightSignExtendedInt64x8: - return rewriteValueAMD64_OpMaskedShiftAllRightSignExtendedInt64x8(v) - case OpMaskedShiftAllRightUint64x2: - return rewriteValueAMD64_OpMaskedShiftAllRightUint64x2(v) - case OpMaskedShiftAllRightUint64x4: - return rewriteValueAMD64_OpMaskedShiftAllRightUint64x4(v) - case OpMaskedShiftAllRightUint64x8: - return rewriteValueAMD64_OpMaskedShiftAllRightUint64x8(v) - case OpMaskedShiftLeftAndFillUpperFromInt16x16: - return rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromInt16x16(v) - case OpMaskedShiftLeftAndFillUpperFromInt16x32: - return rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromInt16x32(v) - case OpMaskedShiftLeftAndFillUpperFromInt16x8: - return rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromInt16x8(v) - case OpMaskedShiftLeftAndFillUpperFromInt32x16: - return rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromInt32x16(v) - case OpMaskedShiftLeftAndFillUpperFromInt32x4: - return rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromInt32x4(v) - case OpMaskedShiftLeftAndFillUpperFromInt32x8: - return rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromInt32x8(v) - case OpMaskedShiftLeftAndFillUpperFromInt64x2: - return rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromInt64x2(v) - case OpMaskedShiftLeftAndFillUpperFromInt64x4: - return rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromInt64x4(v) - case OpMaskedShiftLeftAndFillUpperFromInt64x8: - return rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromInt64x8(v) - case OpMaskedShiftLeftAndFillUpperFromUint16x16: - return rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromUint16x16(v) - case OpMaskedShiftLeftAndFillUpperFromUint16x32: - return rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromUint16x32(v) - case OpMaskedShiftLeftAndFillUpperFromUint16x8: - return rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromUint16x8(v) - case OpMaskedShiftLeftAndFillUpperFromUint32x16: - return rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromUint32x16(v) - case OpMaskedShiftLeftAndFillUpperFromUint32x4: - return rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromUint32x4(v) - case OpMaskedShiftLeftAndFillUpperFromUint32x8: - return rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromUint32x8(v) - case OpMaskedShiftLeftAndFillUpperFromUint64x2: - return rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromUint64x2(v) - case OpMaskedShiftLeftAndFillUpperFromUint64x4: - return rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromUint64x4(v) - case OpMaskedShiftLeftAndFillUpperFromUint64x8: - return rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromUint64x8(v) - case OpMaskedShiftLeftInt16x16: - return rewriteValueAMD64_OpMaskedShiftLeftInt16x16(v) - case OpMaskedShiftLeftInt16x32: - return rewriteValueAMD64_OpMaskedShiftLeftInt16x32(v) - case OpMaskedShiftLeftInt16x8: - return rewriteValueAMD64_OpMaskedShiftLeftInt16x8(v) - case OpMaskedShiftLeftInt32x16: - return rewriteValueAMD64_OpMaskedShiftLeftInt32x16(v) - case OpMaskedShiftLeftInt32x4: - return rewriteValueAMD64_OpMaskedShiftLeftInt32x4(v) - case OpMaskedShiftLeftInt32x8: - return rewriteValueAMD64_OpMaskedShiftLeftInt32x8(v) - case OpMaskedShiftLeftInt64x2: - return rewriteValueAMD64_OpMaskedShiftLeftInt64x2(v) - case OpMaskedShiftLeftInt64x4: - return rewriteValueAMD64_OpMaskedShiftLeftInt64x4(v) - case OpMaskedShiftLeftInt64x8: - return rewriteValueAMD64_OpMaskedShiftLeftInt64x8(v) - case OpMaskedShiftLeftUint16x16: - return rewriteValueAMD64_OpMaskedShiftLeftUint16x16(v) - case OpMaskedShiftLeftUint16x32: - return rewriteValueAMD64_OpMaskedShiftLeftUint16x32(v) - case OpMaskedShiftLeftUint16x8: - return rewriteValueAMD64_OpMaskedShiftLeftUint16x8(v) - case OpMaskedShiftLeftUint32x16: - return rewriteValueAMD64_OpMaskedShiftLeftUint32x16(v) - case OpMaskedShiftLeftUint32x4: - return rewriteValueAMD64_OpMaskedShiftLeftUint32x4(v) - case OpMaskedShiftLeftUint32x8: - return rewriteValueAMD64_OpMaskedShiftLeftUint32x8(v) - case OpMaskedShiftLeftUint64x2: - return rewriteValueAMD64_OpMaskedShiftLeftUint64x2(v) - case OpMaskedShiftLeftUint64x4: - return rewriteValueAMD64_OpMaskedShiftLeftUint64x4(v) - case OpMaskedShiftLeftUint64x8: - return rewriteValueAMD64_OpMaskedShiftLeftUint64x8(v) - case OpMaskedShiftRightAndFillUpperFromInt16x16: - return rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromInt16x16(v) - case OpMaskedShiftRightAndFillUpperFromInt16x32: - return rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromInt16x32(v) - case OpMaskedShiftRightAndFillUpperFromInt16x8: - return rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromInt16x8(v) - case OpMaskedShiftRightAndFillUpperFromInt32x16: - return rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromInt32x16(v) - case OpMaskedShiftRightAndFillUpperFromInt32x4: - return rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromInt32x4(v) - case OpMaskedShiftRightAndFillUpperFromInt32x8: - return rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromInt32x8(v) - case OpMaskedShiftRightAndFillUpperFromInt64x2: - return rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromInt64x2(v) - case OpMaskedShiftRightAndFillUpperFromInt64x4: - return rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromInt64x4(v) - case OpMaskedShiftRightAndFillUpperFromInt64x8: - return rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromInt64x8(v) - case OpMaskedShiftRightAndFillUpperFromUint16x16: - return rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromUint16x16(v) - case OpMaskedShiftRightAndFillUpperFromUint16x32: - return rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromUint16x32(v) - case OpMaskedShiftRightAndFillUpperFromUint16x8: - return rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromUint16x8(v) - case OpMaskedShiftRightAndFillUpperFromUint32x16: - return rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromUint32x16(v) - case OpMaskedShiftRightAndFillUpperFromUint32x4: - return rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromUint32x4(v) - case OpMaskedShiftRightAndFillUpperFromUint32x8: - return rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromUint32x8(v) - case OpMaskedShiftRightAndFillUpperFromUint64x2: - return rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromUint64x2(v) - case OpMaskedShiftRightAndFillUpperFromUint64x4: - return rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromUint64x4(v) - case OpMaskedShiftRightAndFillUpperFromUint64x8: - return rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromUint64x8(v) - case OpMaskedShiftRightInt16x16: - return rewriteValueAMD64_OpMaskedShiftRightInt16x16(v) - case OpMaskedShiftRightInt16x32: - return rewriteValueAMD64_OpMaskedShiftRightInt16x32(v) - case OpMaskedShiftRightInt16x8: - return rewriteValueAMD64_OpMaskedShiftRightInt16x8(v) - case OpMaskedShiftRightInt32x16: - return rewriteValueAMD64_OpMaskedShiftRightInt32x16(v) - case OpMaskedShiftRightInt32x4: - return rewriteValueAMD64_OpMaskedShiftRightInt32x4(v) - case OpMaskedShiftRightInt32x8: - return rewriteValueAMD64_OpMaskedShiftRightInt32x8(v) - case OpMaskedShiftRightInt64x2: - return rewriteValueAMD64_OpMaskedShiftRightInt64x2(v) - case OpMaskedShiftRightInt64x4: - return rewriteValueAMD64_OpMaskedShiftRightInt64x4(v) - case OpMaskedShiftRightInt64x8: - return rewriteValueAMD64_OpMaskedShiftRightInt64x8(v) - case OpMaskedShiftRightSignExtendedInt16x16: - return rewriteValueAMD64_OpMaskedShiftRightSignExtendedInt16x16(v) - case OpMaskedShiftRightSignExtendedInt16x32: - return rewriteValueAMD64_OpMaskedShiftRightSignExtendedInt16x32(v) - case OpMaskedShiftRightSignExtendedInt16x8: - return rewriteValueAMD64_OpMaskedShiftRightSignExtendedInt16x8(v) - case OpMaskedShiftRightSignExtendedInt32x16: - return rewriteValueAMD64_OpMaskedShiftRightSignExtendedInt32x16(v) - case OpMaskedShiftRightSignExtendedInt32x4: - return rewriteValueAMD64_OpMaskedShiftRightSignExtendedInt32x4(v) - case OpMaskedShiftRightSignExtendedInt32x8: - return rewriteValueAMD64_OpMaskedShiftRightSignExtendedInt32x8(v) - case OpMaskedShiftRightSignExtendedInt64x2: - return rewriteValueAMD64_OpMaskedShiftRightSignExtendedInt64x2(v) - case OpMaskedShiftRightSignExtendedInt64x4: - return rewriteValueAMD64_OpMaskedShiftRightSignExtendedInt64x4(v) - case OpMaskedShiftRightSignExtendedInt64x8: - return rewriteValueAMD64_OpMaskedShiftRightSignExtendedInt64x8(v) - case OpMaskedShiftRightSignExtendedUint16x16: - return rewriteValueAMD64_OpMaskedShiftRightSignExtendedUint16x16(v) - case OpMaskedShiftRightSignExtendedUint16x32: - return rewriteValueAMD64_OpMaskedShiftRightSignExtendedUint16x32(v) - case OpMaskedShiftRightSignExtendedUint16x8: - return rewriteValueAMD64_OpMaskedShiftRightSignExtendedUint16x8(v) - case OpMaskedShiftRightSignExtendedUint32x16: - return rewriteValueAMD64_OpMaskedShiftRightSignExtendedUint32x16(v) - case OpMaskedShiftRightSignExtendedUint32x4: - return rewriteValueAMD64_OpMaskedShiftRightSignExtendedUint32x4(v) - case OpMaskedShiftRightSignExtendedUint32x8: - return rewriteValueAMD64_OpMaskedShiftRightSignExtendedUint32x8(v) - case OpMaskedShiftRightSignExtendedUint64x2: - return rewriteValueAMD64_OpMaskedShiftRightSignExtendedUint64x2(v) - case OpMaskedShiftRightSignExtendedUint64x4: - return rewriteValueAMD64_OpMaskedShiftRightSignExtendedUint64x4(v) - case OpMaskedShiftRightSignExtendedUint64x8: - return rewriteValueAMD64_OpMaskedShiftRightSignExtendedUint64x8(v) - case OpMaskedShiftRightUint16x16: - return rewriteValueAMD64_OpMaskedShiftRightUint16x16(v) - case OpMaskedShiftRightUint16x32: - return rewriteValueAMD64_OpMaskedShiftRightUint16x32(v) - case OpMaskedShiftRightUint16x8: - return rewriteValueAMD64_OpMaskedShiftRightUint16x8(v) - case OpMaskedShiftRightUint32x16: - return rewriteValueAMD64_OpMaskedShiftRightUint32x16(v) - case OpMaskedShiftRightUint32x4: - return rewriteValueAMD64_OpMaskedShiftRightUint32x4(v) - case OpMaskedShiftRightUint32x8: - return rewriteValueAMD64_OpMaskedShiftRightUint32x8(v) - case OpMaskedShiftRightUint64x2: - return rewriteValueAMD64_OpMaskedShiftRightUint64x2(v) - case OpMaskedShiftRightUint64x4: - return rewriteValueAMD64_OpMaskedShiftRightUint64x4(v) - case OpMaskedShiftRightUint64x8: - return rewriteValueAMD64_OpMaskedShiftRightUint64x8(v) - case OpMaskedSqrtFloat32x16: - return rewriteValueAMD64_OpMaskedSqrtFloat32x16(v) - case OpMaskedSqrtFloat32x4: - return rewriteValueAMD64_OpMaskedSqrtFloat32x4(v) - case OpMaskedSqrtFloat32x8: - return rewriteValueAMD64_OpMaskedSqrtFloat32x8(v) - case OpMaskedSqrtFloat64x2: - return rewriteValueAMD64_OpMaskedSqrtFloat64x2(v) - case OpMaskedSqrtFloat64x4: - return rewriteValueAMD64_OpMaskedSqrtFloat64x4(v) - case OpMaskedSqrtFloat64x8: - return rewriteValueAMD64_OpMaskedSqrtFloat64x8(v) - case OpMaskedSubFloat32x16: - return rewriteValueAMD64_OpMaskedSubFloat32x16(v) - case OpMaskedSubFloat32x4: - return rewriteValueAMD64_OpMaskedSubFloat32x4(v) - case OpMaskedSubFloat32x8: - return rewriteValueAMD64_OpMaskedSubFloat32x8(v) - case OpMaskedSubFloat64x2: - return rewriteValueAMD64_OpMaskedSubFloat64x2(v) - case OpMaskedSubFloat64x4: - return rewriteValueAMD64_OpMaskedSubFloat64x4(v) - case OpMaskedSubFloat64x8: - return rewriteValueAMD64_OpMaskedSubFloat64x8(v) - case OpMaskedSubInt16x16: - return rewriteValueAMD64_OpMaskedSubInt16x16(v) - case OpMaskedSubInt16x32: - return rewriteValueAMD64_OpMaskedSubInt16x32(v) - case OpMaskedSubInt16x8: - return rewriteValueAMD64_OpMaskedSubInt16x8(v) - case OpMaskedSubInt32x16: - return rewriteValueAMD64_OpMaskedSubInt32x16(v) - case OpMaskedSubInt32x4: - return rewriteValueAMD64_OpMaskedSubInt32x4(v) - case OpMaskedSubInt32x8: - return rewriteValueAMD64_OpMaskedSubInt32x8(v) - case OpMaskedSubInt64x2: - return rewriteValueAMD64_OpMaskedSubInt64x2(v) - case OpMaskedSubInt64x4: - return rewriteValueAMD64_OpMaskedSubInt64x4(v) - case OpMaskedSubInt64x8: - return rewriteValueAMD64_OpMaskedSubInt64x8(v) - case OpMaskedSubInt8x16: - return rewriteValueAMD64_OpMaskedSubInt8x16(v) - case OpMaskedSubInt8x32: - return rewriteValueAMD64_OpMaskedSubInt8x32(v) - case OpMaskedSubInt8x64: - return rewriteValueAMD64_OpMaskedSubInt8x64(v) - case OpMaskedSubUint16x16: - return rewriteValueAMD64_OpMaskedSubUint16x16(v) - case OpMaskedSubUint16x32: - return rewriteValueAMD64_OpMaskedSubUint16x32(v) - case OpMaskedSubUint16x8: - return rewriteValueAMD64_OpMaskedSubUint16x8(v) - case OpMaskedSubUint32x16: - return rewriteValueAMD64_OpMaskedSubUint32x16(v) - case OpMaskedSubUint32x4: - return rewriteValueAMD64_OpMaskedSubUint32x4(v) - case OpMaskedSubUint32x8: - return rewriteValueAMD64_OpMaskedSubUint32x8(v) - case OpMaskedSubUint64x2: - return rewriteValueAMD64_OpMaskedSubUint64x2(v) - case OpMaskedSubUint64x4: - return rewriteValueAMD64_OpMaskedSubUint64x4(v) - case OpMaskedSubUint64x8: - return rewriteValueAMD64_OpMaskedSubUint64x8(v) - case OpMaskedSubUint8x16: - return rewriteValueAMD64_OpMaskedSubUint8x16(v) - case OpMaskedSubUint8x32: - return rewriteValueAMD64_OpMaskedSubUint8x32(v) - case OpMaskedSubUint8x64: - return rewriteValueAMD64_OpMaskedSubUint8x64(v) - case OpMaskedTruncWithPrecisionFloat32x16: - return rewriteValueAMD64_OpMaskedTruncWithPrecisionFloat32x16(v) - case OpMaskedTruncWithPrecisionFloat32x4: - return rewriteValueAMD64_OpMaskedTruncWithPrecisionFloat32x4(v) - case OpMaskedTruncWithPrecisionFloat32x8: - return rewriteValueAMD64_OpMaskedTruncWithPrecisionFloat32x8(v) - case OpMaskedTruncWithPrecisionFloat64x2: - return rewriteValueAMD64_OpMaskedTruncWithPrecisionFloat64x2(v) - case OpMaskedTruncWithPrecisionFloat64x4: - return rewriteValueAMD64_OpMaskedTruncWithPrecisionFloat64x4(v) - case OpMaskedTruncWithPrecisionFloat64x8: - return rewriteValueAMD64_OpMaskedTruncWithPrecisionFloat64x8(v) - case OpMaskedUnsignedSignedQuadDotProdAccumulateInt32x16: - return rewriteValueAMD64_OpMaskedUnsignedSignedQuadDotProdAccumulateInt32x16(v) - case OpMaskedUnsignedSignedQuadDotProdAccumulateInt32x4: - return rewriteValueAMD64_OpMaskedUnsignedSignedQuadDotProdAccumulateInt32x4(v) - case OpMaskedUnsignedSignedQuadDotProdAccumulateInt32x8: - return rewriteValueAMD64_OpMaskedUnsignedSignedQuadDotProdAccumulateInt32x8(v) - case OpMaskedUnsignedSignedQuadDotProdAccumulateUint32x16: - return rewriteValueAMD64_OpMaskedUnsignedSignedQuadDotProdAccumulateUint32x16(v) - case OpMaskedUnsignedSignedQuadDotProdAccumulateUint32x4: - return rewriteValueAMD64_OpMaskedUnsignedSignedQuadDotProdAccumulateUint32x4(v) - case OpMaskedUnsignedSignedQuadDotProdAccumulateUint32x8: - return rewriteValueAMD64_OpMaskedUnsignedSignedQuadDotProdAccumulateUint32x8(v) - case OpMaskedXorInt32x16: - return rewriteValueAMD64_OpMaskedXorInt32x16(v) - case OpMaskedXorInt32x4: - return rewriteValueAMD64_OpMaskedXorInt32x4(v) - case OpMaskedXorInt32x8: - return rewriteValueAMD64_OpMaskedXorInt32x8(v) - case OpMaskedXorInt64x2: - return rewriteValueAMD64_OpMaskedXorInt64x2(v) - case OpMaskedXorInt64x4: - return rewriteValueAMD64_OpMaskedXorInt64x4(v) - case OpMaskedXorInt64x8: - return rewriteValueAMD64_OpMaskedXorInt64x8(v) - case OpMaskedXorUint32x16: - return rewriteValueAMD64_OpMaskedXorUint32x16(v) - case OpMaskedXorUint32x4: - return rewriteValueAMD64_OpMaskedXorUint32x4(v) - case OpMaskedXorUint32x8: - return rewriteValueAMD64_OpMaskedXorUint32x8(v) - case OpMaskedXorUint64x2: - return rewriteValueAMD64_OpMaskedXorUint64x2(v) - case OpMaskedXorUint64x4: - return rewriteValueAMD64_OpMaskedXorUint64x4(v) - case OpMaskedXorUint64x8: - return rewriteValueAMD64_OpMaskedXorUint64x8(v) case OpMax32F: return rewriteValueAMD64_OpMax32F(v) case OpMax64F: @@ -3345,6 +2433,66 @@ func rewriteValueAMD64(v *Value) bool { case OpMaxInt8x64: v.Op = OpAMD64VPMAXSB512 return true + case OpMaxMaskedFloat32x16: + return rewriteValueAMD64_OpMaxMaskedFloat32x16(v) + case OpMaxMaskedFloat32x4: + return rewriteValueAMD64_OpMaxMaskedFloat32x4(v) + case OpMaxMaskedFloat32x8: + return rewriteValueAMD64_OpMaxMaskedFloat32x8(v) + case OpMaxMaskedFloat64x2: + return rewriteValueAMD64_OpMaxMaskedFloat64x2(v) + case OpMaxMaskedFloat64x4: + return rewriteValueAMD64_OpMaxMaskedFloat64x4(v) + case OpMaxMaskedFloat64x8: + return rewriteValueAMD64_OpMaxMaskedFloat64x8(v) + case OpMaxMaskedInt16x16: + return rewriteValueAMD64_OpMaxMaskedInt16x16(v) + case OpMaxMaskedInt16x32: + return rewriteValueAMD64_OpMaxMaskedInt16x32(v) + case OpMaxMaskedInt16x8: + return rewriteValueAMD64_OpMaxMaskedInt16x8(v) + case OpMaxMaskedInt32x16: + return rewriteValueAMD64_OpMaxMaskedInt32x16(v) + case OpMaxMaskedInt32x4: + return rewriteValueAMD64_OpMaxMaskedInt32x4(v) + case OpMaxMaskedInt32x8: + return rewriteValueAMD64_OpMaxMaskedInt32x8(v) + case OpMaxMaskedInt64x2: + return rewriteValueAMD64_OpMaxMaskedInt64x2(v) + case OpMaxMaskedInt64x4: + return rewriteValueAMD64_OpMaxMaskedInt64x4(v) + case OpMaxMaskedInt64x8: + return rewriteValueAMD64_OpMaxMaskedInt64x8(v) + case OpMaxMaskedInt8x16: + return rewriteValueAMD64_OpMaxMaskedInt8x16(v) + case OpMaxMaskedInt8x32: + return rewriteValueAMD64_OpMaxMaskedInt8x32(v) + case OpMaxMaskedInt8x64: + return rewriteValueAMD64_OpMaxMaskedInt8x64(v) + case OpMaxMaskedUint16x16: + return rewriteValueAMD64_OpMaxMaskedUint16x16(v) + case OpMaxMaskedUint16x32: + return rewriteValueAMD64_OpMaxMaskedUint16x32(v) + case OpMaxMaskedUint16x8: + return rewriteValueAMD64_OpMaxMaskedUint16x8(v) + case OpMaxMaskedUint32x16: + return rewriteValueAMD64_OpMaxMaskedUint32x16(v) + case OpMaxMaskedUint32x4: + return rewriteValueAMD64_OpMaxMaskedUint32x4(v) + case OpMaxMaskedUint32x8: + return rewriteValueAMD64_OpMaxMaskedUint32x8(v) + case OpMaxMaskedUint64x2: + return rewriteValueAMD64_OpMaxMaskedUint64x2(v) + case OpMaxMaskedUint64x4: + return rewriteValueAMD64_OpMaxMaskedUint64x4(v) + case OpMaxMaskedUint64x8: + return rewriteValueAMD64_OpMaxMaskedUint64x8(v) + case OpMaxMaskedUint8x16: + return rewriteValueAMD64_OpMaxMaskedUint8x16(v) + case OpMaxMaskedUint8x32: + return rewriteValueAMD64_OpMaxMaskedUint8x32(v) + case OpMaxMaskedUint8x64: + return rewriteValueAMD64_OpMaxMaskedUint8x64(v) case OpMaxUint16x16: v.Op = OpAMD64VPMAXUW256 return true @@ -3439,6 +2587,66 @@ func rewriteValueAMD64(v *Value) bool { case OpMinInt8x64: v.Op = OpAMD64VPMINSB512 return true + case OpMinMaskedFloat32x16: + return rewriteValueAMD64_OpMinMaskedFloat32x16(v) + case OpMinMaskedFloat32x4: + return rewriteValueAMD64_OpMinMaskedFloat32x4(v) + case OpMinMaskedFloat32x8: + return rewriteValueAMD64_OpMinMaskedFloat32x8(v) + case OpMinMaskedFloat64x2: + return rewriteValueAMD64_OpMinMaskedFloat64x2(v) + case OpMinMaskedFloat64x4: + return rewriteValueAMD64_OpMinMaskedFloat64x4(v) + case OpMinMaskedFloat64x8: + return rewriteValueAMD64_OpMinMaskedFloat64x8(v) + case OpMinMaskedInt16x16: + return rewriteValueAMD64_OpMinMaskedInt16x16(v) + case OpMinMaskedInt16x32: + return rewriteValueAMD64_OpMinMaskedInt16x32(v) + case OpMinMaskedInt16x8: + return rewriteValueAMD64_OpMinMaskedInt16x8(v) + case OpMinMaskedInt32x16: + return rewriteValueAMD64_OpMinMaskedInt32x16(v) + case OpMinMaskedInt32x4: + return rewriteValueAMD64_OpMinMaskedInt32x4(v) + case OpMinMaskedInt32x8: + return rewriteValueAMD64_OpMinMaskedInt32x8(v) + case OpMinMaskedInt64x2: + return rewriteValueAMD64_OpMinMaskedInt64x2(v) + case OpMinMaskedInt64x4: + return rewriteValueAMD64_OpMinMaskedInt64x4(v) + case OpMinMaskedInt64x8: + return rewriteValueAMD64_OpMinMaskedInt64x8(v) + case OpMinMaskedInt8x16: + return rewriteValueAMD64_OpMinMaskedInt8x16(v) + case OpMinMaskedInt8x32: + return rewriteValueAMD64_OpMinMaskedInt8x32(v) + case OpMinMaskedInt8x64: + return rewriteValueAMD64_OpMinMaskedInt8x64(v) + case OpMinMaskedUint16x16: + return rewriteValueAMD64_OpMinMaskedUint16x16(v) + case OpMinMaskedUint16x32: + return rewriteValueAMD64_OpMinMaskedUint16x32(v) + case OpMinMaskedUint16x8: + return rewriteValueAMD64_OpMinMaskedUint16x8(v) + case OpMinMaskedUint32x16: + return rewriteValueAMD64_OpMinMaskedUint32x16(v) + case OpMinMaskedUint32x4: + return rewriteValueAMD64_OpMinMaskedUint32x4(v) + case OpMinMaskedUint32x8: + return rewriteValueAMD64_OpMinMaskedUint32x8(v) + case OpMinMaskedUint64x2: + return rewriteValueAMD64_OpMinMaskedUint64x2(v) + case OpMinMaskedUint64x4: + return rewriteValueAMD64_OpMinMaskedUint64x4(v) + case OpMinMaskedUint64x8: + return rewriteValueAMD64_OpMinMaskedUint64x8(v) + case OpMinMaskedUint8x16: + return rewriteValueAMD64_OpMinMaskedUint8x16(v) + case OpMinMaskedUint8x32: + return rewriteValueAMD64_OpMinMaskedUint8x32(v) + case OpMinMaskedUint8x64: + return rewriteValueAMD64_OpMinMaskedUint8x64(v) case OpMinUint16x16: v.Op = OpAMD64VPMINUW256 return true @@ -3532,6 +2740,18 @@ func rewriteValueAMD64(v *Value) bool { case OpMulByPowOf2Float64x8: v.Op = OpAMD64VSCALEFPD512 return true + case OpMulByPowOf2MaskedFloat32x16: + return rewriteValueAMD64_OpMulByPowOf2MaskedFloat32x16(v) + case OpMulByPowOf2MaskedFloat32x4: + return rewriteValueAMD64_OpMulByPowOf2MaskedFloat32x4(v) + case OpMulByPowOf2MaskedFloat32x8: + return rewriteValueAMD64_OpMulByPowOf2MaskedFloat32x8(v) + case OpMulByPowOf2MaskedFloat64x2: + return rewriteValueAMD64_OpMulByPowOf2MaskedFloat64x2(v) + case OpMulByPowOf2MaskedFloat64x4: + return rewriteValueAMD64_OpMulByPowOf2MaskedFloat64x4(v) + case OpMulByPowOf2MaskedFloat64x8: + return rewriteValueAMD64_OpMulByPowOf2MaskedFloat64x8(v) case OpMulEvenWidenInt32x4: v.Op = OpAMD64VPMULDQ128 return true @@ -3547,6 +2767,18 @@ func rewriteValueAMD64(v *Value) bool { case OpMulEvenWidenInt64x8: v.Op = OpAMD64VPMULDQ512 return true + case OpMulEvenWidenMaskedInt64x2: + return rewriteValueAMD64_OpMulEvenWidenMaskedInt64x2(v) + case OpMulEvenWidenMaskedInt64x4: + return rewriteValueAMD64_OpMulEvenWidenMaskedInt64x4(v) + case OpMulEvenWidenMaskedInt64x8: + return rewriteValueAMD64_OpMulEvenWidenMaskedInt64x8(v) + case OpMulEvenWidenMaskedUint64x2: + return rewriteValueAMD64_OpMulEvenWidenMaskedUint64x2(v) + case OpMulEvenWidenMaskedUint64x4: + return rewriteValueAMD64_OpMulEvenWidenMaskedUint64x4(v) + case OpMulEvenWidenMaskedUint64x8: + return rewriteValueAMD64_OpMulEvenWidenMaskedUint64x8(v) case OpMulEvenWidenUint32x4: v.Op = OpAMD64VPMULUDQ128 return true @@ -3589,6 +2821,18 @@ func rewriteValueAMD64(v *Value) bool { case OpMulHighInt16x8: v.Op = OpAMD64VPMULHW128 return true + case OpMulHighMaskedInt16x16: + return rewriteValueAMD64_OpMulHighMaskedInt16x16(v) + case OpMulHighMaskedInt16x32: + return rewriteValueAMD64_OpMulHighMaskedInt16x32(v) + case OpMulHighMaskedInt16x8: + return rewriteValueAMD64_OpMulHighMaskedInt16x8(v) + case OpMulHighMaskedUint16x16: + return rewriteValueAMD64_OpMulHighMaskedUint16x16(v) + case OpMulHighMaskedUint16x32: + return rewriteValueAMD64_OpMulHighMaskedUint16x32(v) + case OpMulHighMaskedUint16x8: + return rewriteValueAMD64_OpMulHighMaskedUint16x8(v) case OpMulHighUint16x16: v.Op = OpAMD64VPMULHUW256 return true @@ -3625,6 +2869,36 @@ func rewriteValueAMD64(v *Value) bool { case OpMulLowInt64x8: v.Op = OpAMD64VPMULLQ512 return true + case OpMulLowMaskedInt16x16: + return rewriteValueAMD64_OpMulLowMaskedInt16x16(v) + case OpMulLowMaskedInt16x32: + return rewriteValueAMD64_OpMulLowMaskedInt16x32(v) + case OpMulLowMaskedInt16x8: + return rewriteValueAMD64_OpMulLowMaskedInt16x8(v) + case OpMulLowMaskedInt32x16: + return rewriteValueAMD64_OpMulLowMaskedInt32x16(v) + case OpMulLowMaskedInt32x4: + return rewriteValueAMD64_OpMulLowMaskedInt32x4(v) + case OpMulLowMaskedInt32x8: + return rewriteValueAMD64_OpMulLowMaskedInt32x8(v) + case OpMulLowMaskedInt64x2: + return rewriteValueAMD64_OpMulLowMaskedInt64x2(v) + case OpMulLowMaskedInt64x4: + return rewriteValueAMD64_OpMulLowMaskedInt64x4(v) + case OpMulLowMaskedInt64x8: + return rewriteValueAMD64_OpMulLowMaskedInt64x8(v) + case OpMulMaskedFloat32x16: + return rewriteValueAMD64_OpMulMaskedFloat32x16(v) + case OpMulMaskedFloat32x4: + return rewriteValueAMD64_OpMulMaskedFloat32x4(v) + case OpMulMaskedFloat32x8: + return rewriteValueAMD64_OpMulMaskedFloat32x8(v) + case OpMulMaskedFloat64x2: + return rewriteValueAMD64_OpMulMaskedFloat64x2(v) + case OpMulMaskedFloat64x4: + return rewriteValueAMD64_OpMulMaskedFloat64x4(v) + case OpMulMaskedFloat64x8: + return rewriteValueAMD64_OpMulMaskedFloat64x8(v) case OpNeg16: v.Op = OpAMD64NEGL return true @@ -3698,6 +2972,66 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpNotEqualInt8x32(v) case OpNotEqualInt8x64: return rewriteValueAMD64_OpNotEqualInt8x64(v) + case OpNotEqualMaskedFloat32x16: + return rewriteValueAMD64_OpNotEqualMaskedFloat32x16(v) + case OpNotEqualMaskedFloat32x4: + return rewriteValueAMD64_OpNotEqualMaskedFloat32x4(v) + case OpNotEqualMaskedFloat32x8: + return rewriteValueAMD64_OpNotEqualMaskedFloat32x8(v) + case OpNotEqualMaskedFloat64x2: + return rewriteValueAMD64_OpNotEqualMaskedFloat64x2(v) + case OpNotEqualMaskedFloat64x4: + return rewriteValueAMD64_OpNotEqualMaskedFloat64x4(v) + case OpNotEqualMaskedFloat64x8: + return rewriteValueAMD64_OpNotEqualMaskedFloat64x8(v) + case OpNotEqualMaskedInt16x16: + return rewriteValueAMD64_OpNotEqualMaskedInt16x16(v) + case OpNotEqualMaskedInt16x32: + return rewriteValueAMD64_OpNotEqualMaskedInt16x32(v) + case OpNotEqualMaskedInt16x8: + return rewriteValueAMD64_OpNotEqualMaskedInt16x8(v) + case OpNotEqualMaskedInt32x16: + return rewriteValueAMD64_OpNotEqualMaskedInt32x16(v) + case OpNotEqualMaskedInt32x4: + return rewriteValueAMD64_OpNotEqualMaskedInt32x4(v) + case OpNotEqualMaskedInt32x8: + return rewriteValueAMD64_OpNotEqualMaskedInt32x8(v) + case OpNotEqualMaskedInt64x2: + return rewriteValueAMD64_OpNotEqualMaskedInt64x2(v) + case OpNotEqualMaskedInt64x4: + return rewriteValueAMD64_OpNotEqualMaskedInt64x4(v) + case OpNotEqualMaskedInt64x8: + return rewriteValueAMD64_OpNotEqualMaskedInt64x8(v) + case OpNotEqualMaskedInt8x16: + return rewriteValueAMD64_OpNotEqualMaskedInt8x16(v) + case OpNotEqualMaskedInt8x32: + return rewriteValueAMD64_OpNotEqualMaskedInt8x32(v) + case OpNotEqualMaskedInt8x64: + return rewriteValueAMD64_OpNotEqualMaskedInt8x64(v) + case OpNotEqualMaskedUint16x16: + return rewriteValueAMD64_OpNotEqualMaskedUint16x16(v) + case OpNotEqualMaskedUint16x32: + return rewriteValueAMD64_OpNotEqualMaskedUint16x32(v) + case OpNotEqualMaskedUint16x8: + return rewriteValueAMD64_OpNotEqualMaskedUint16x8(v) + case OpNotEqualMaskedUint32x16: + return rewriteValueAMD64_OpNotEqualMaskedUint32x16(v) + case OpNotEqualMaskedUint32x4: + return rewriteValueAMD64_OpNotEqualMaskedUint32x4(v) + case OpNotEqualMaskedUint32x8: + return rewriteValueAMD64_OpNotEqualMaskedUint32x8(v) + case OpNotEqualMaskedUint64x2: + return rewriteValueAMD64_OpNotEqualMaskedUint64x2(v) + case OpNotEqualMaskedUint64x4: + return rewriteValueAMD64_OpNotEqualMaskedUint64x4(v) + case OpNotEqualMaskedUint64x8: + return rewriteValueAMD64_OpNotEqualMaskedUint64x8(v) + case OpNotEqualMaskedUint8x16: + return rewriteValueAMD64_OpNotEqualMaskedUint8x16(v) + case OpNotEqualMaskedUint8x32: + return rewriteValueAMD64_OpNotEqualMaskedUint8x32(v) + case OpNotEqualMaskedUint8x64: + return rewriteValueAMD64_OpNotEqualMaskedUint8x64(v) case OpNotEqualUint16x16: return rewriteValueAMD64_OpNotEqualUint16x16(v) case OpNotEqualUint16x32: @@ -3769,6 +3103,30 @@ func rewriteValueAMD64(v *Value) bool { case OpOrInt8x32: v.Op = OpAMD64VPOR256 return true + case OpOrMaskedInt32x16: + return rewriteValueAMD64_OpOrMaskedInt32x16(v) + case OpOrMaskedInt32x4: + return rewriteValueAMD64_OpOrMaskedInt32x4(v) + case OpOrMaskedInt32x8: + return rewriteValueAMD64_OpOrMaskedInt32x8(v) + case OpOrMaskedInt64x2: + return rewriteValueAMD64_OpOrMaskedInt64x2(v) + case OpOrMaskedInt64x4: + return rewriteValueAMD64_OpOrMaskedInt64x4(v) + case OpOrMaskedInt64x8: + return rewriteValueAMD64_OpOrMaskedInt64x8(v) + case OpOrMaskedUint32x16: + return rewriteValueAMD64_OpOrMaskedUint32x16(v) + case OpOrMaskedUint32x4: + return rewriteValueAMD64_OpOrMaskedUint32x4(v) + case OpOrMaskedUint32x8: + return rewriteValueAMD64_OpOrMaskedUint32x8(v) + case OpOrMaskedUint64x2: + return rewriteValueAMD64_OpOrMaskedUint64x2(v) + case OpOrMaskedUint64x4: + return rewriteValueAMD64_OpOrMaskedUint64x4(v) + case OpOrMaskedUint64x8: + return rewriteValueAMD64_OpOrMaskedUint64x8(v) case OpOrUint16x16: v.Op = OpAMD64VPOR256 return true @@ -3808,6 +3166,12 @@ func rewriteValueAMD64(v *Value) bool { case OpPairDotProdAccumulateInt32x8: v.Op = OpAMD64VPDPWSSD256 return true + case OpPairDotProdAccumulateMaskedInt32x16: + return rewriteValueAMD64_OpPairDotProdAccumulateMaskedInt32x16(v) + case OpPairDotProdAccumulateMaskedInt32x4: + return rewriteValueAMD64_OpPairDotProdAccumulateMaskedInt32x4(v) + case OpPairDotProdAccumulateMaskedInt32x8: + return rewriteValueAMD64_OpPairDotProdAccumulateMaskedInt32x8(v) case OpPairDotProdInt16x16: v.Op = OpAMD64VPMADDWD256 return true @@ -3817,6 +3181,12 @@ func rewriteValueAMD64(v *Value) bool { case OpPairDotProdInt16x8: v.Op = OpAMD64VPMADDWD128 return true + case OpPairDotProdMaskedInt16x16: + return rewriteValueAMD64_OpPairDotProdMaskedInt16x16(v) + case OpPairDotProdMaskedInt16x32: + return rewriteValueAMD64_OpPairDotProdMaskedInt16x32(v) + case OpPairDotProdMaskedInt16x8: + return rewriteValueAMD64_OpPairDotProdMaskedInt16x8(v) case OpPairwiseAddFloat32x4: v.Op = OpAMD64VHADDPS128 return true @@ -3937,6 +3307,54 @@ func rewriteValueAMD64(v *Value) bool { case OpPopCountInt8x64: v.Op = OpAMD64VPOPCNTB512 return true + case OpPopCountMaskedInt16x16: + return rewriteValueAMD64_OpPopCountMaskedInt16x16(v) + case OpPopCountMaskedInt16x32: + return rewriteValueAMD64_OpPopCountMaskedInt16x32(v) + case OpPopCountMaskedInt16x8: + return rewriteValueAMD64_OpPopCountMaskedInt16x8(v) + case OpPopCountMaskedInt32x16: + return rewriteValueAMD64_OpPopCountMaskedInt32x16(v) + case OpPopCountMaskedInt32x4: + return rewriteValueAMD64_OpPopCountMaskedInt32x4(v) + case OpPopCountMaskedInt32x8: + return rewriteValueAMD64_OpPopCountMaskedInt32x8(v) + case OpPopCountMaskedInt64x2: + return rewriteValueAMD64_OpPopCountMaskedInt64x2(v) + case OpPopCountMaskedInt64x4: + return rewriteValueAMD64_OpPopCountMaskedInt64x4(v) + case OpPopCountMaskedInt64x8: + return rewriteValueAMD64_OpPopCountMaskedInt64x8(v) + case OpPopCountMaskedInt8x16: + return rewriteValueAMD64_OpPopCountMaskedInt8x16(v) + case OpPopCountMaskedInt8x32: + return rewriteValueAMD64_OpPopCountMaskedInt8x32(v) + case OpPopCountMaskedInt8x64: + return rewriteValueAMD64_OpPopCountMaskedInt8x64(v) + case OpPopCountMaskedUint16x16: + return rewriteValueAMD64_OpPopCountMaskedUint16x16(v) + case OpPopCountMaskedUint16x32: + return rewriteValueAMD64_OpPopCountMaskedUint16x32(v) + case OpPopCountMaskedUint16x8: + return rewriteValueAMD64_OpPopCountMaskedUint16x8(v) + case OpPopCountMaskedUint32x16: + return rewriteValueAMD64_OpPopCountMaskedUint32x16(v) + case OpPopCountMaskedUint32x4: + return rewriteValueAMD64_OpPopCountMaskedUint32x4(v) + case OpPopCountMaskedUint32x8: + return rewriteValueAMD64_OpPopCountMaskedUint32x8(v) + case OpPopCountMaskedUint64x2: + return rewriteValueAMD64_OpPopCountMaskedUint64x2(v) + case OpPopCountMaskedUint64x4: + return rewriteValueAMD64_OpPopCountMaskedUint64x4(v) + case OpPopCountMaskedUint64x8: + return rewriteValueAMD64_OpPopCountMaskedUint64x8(v) + case OpPopCountMaskedUint8x16: + return rewriteValueAMD64_OpPopCountMaskedUint8x16(v) + case OpPopCountMaskedUint8x32: + return rewriteValueAMD64_OpPopCountMaskedUint8x32(v) + case OpPopCountMaskedUint8x64: + return rewriteValueAMD64_OpPopCountMaskedUint8x64(v) case OpPopCountUint16x16: v.Op = OpAMD64VPOPCNTW256 return true @@ -3991,6 +3409,30 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpRotateAllLeftInt64x4(v) case OpRotateAllLeftInt64x8: return rewriteValueAMD64_OpRotateAllLeftInt64x8(v) + case OpRotateAllLeftMaskedInt32x16: + return rewriteValueAMD64_OpRotateAllLeftMaskedInt32x16(v) + case OpRotateAllLeftMaskedInt32x4: + return rewriteValueAMD64_OpRotateAllLeftMaskedInt32x4(v) + case OpRotateAllLeftMaskedInt32x8: + return rewriteValueAMD64_OpRotateAllLeftMaskedInt32x8(v) + case OpRotateAllLeftMaskedInt64x2: + return rewriteValueAMD64_OpRotateAllLeftMaskedInt64x2(v) + case OpRotateAllLeftMaskedInt64x4: + return rewriteValueAMD64_OpRotateAllLeftMaskedInt64x4(v) + case OpRotateAllLeftMaskedInt64x8: + return rewriteValueAMD64_OpRotateAllLeftMaskedInt64x8(v) + case OpRotateAllLeftMaskedUint32x16: + return rewriteValueAMD64_OpRotateAllLeftMaskedUint32x16(v) + case OpRotateAllLeftMaskedUint32x4: + return rewriteValueAMD64_OpRotateAllLeftMaskedUint32x4(v) + case OpRotateAllLeftMaskedUint32x8: + return rewriteValueAMD64_OpRotateAllLeftMaskedUint32x8(v) + case OpRotateAllLeftMaskedUint64x2: + return rewriteValueAMD64_OpRotateAllLeftMaskedUint64x2(v) + case OpRotateAllLeftMaskedUint64x4: + return rewriteValueAMD64_OpRotateAllLeftMaskedUint64x4(v) + case OpRotateAllLeftMaskedUint64x8: + return rewriteValueAMD64_OpRotateAllLeftMaskedUint64x8(v) case OpRotateAllLeftUint32x16: return rewriteValueAMD64_OpRotateAllLeftUint32x16(v) case OpRotateAllLeftUint32x4: @@ -4015,6 +3457,30 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpRotateAllRightInt64x4(v) case OpRotateAllRightInt64x8: return rewriteValueAMD64_OpRotateAllRightInt64x8(v) + case OpRotateAllRightMaskedInt32x16: + return rewriteValueAMD64_OpRotateAllRightMaskedInt32x16(v) + case OpRotateAllRightMaskedInt32x4: + return rewriteValueAMD64_OpRotateAllRightMaskedInt32x4(v) + case OpRotateAllRightMaskedInt32x8: + return rewriteValueAMD64_OpRotateAllRightMaskedInt32x8(v) + case OpRotateAllRightMaskedInt64x2: + return rewriteValueAMD64_OpRotateAllRightMaskedInt64x2(v) + case OpRotateAllRightMaskedInt64x4: + return rewriteValueAMD64_OpRotateAllRightMaskedInt64x4(v) + case OpRotateAllRightMaskedInt64x8: + return rewriteValueAMD64_OpRotateAllRightMaskedInt64x8(v) + case OpRotateAllRightMaskedUint32x16: + return rewriteValueAMD64_OpRotateAllRightMaskedUint32x16(v) + case OpRotateAllRightMaskedUint32x4: + return rewriteValueAMD64_OpRotateAllRightMaskedUint32x4(v) + case OpRotateAllRightMaskedUint32x8: + return rewriteValueAMD64_OpRotateAllRightMaskedUint32x8(v) + case OpRotateAllRightMaskedUint64x2: + return rewriteValueAMD64_OpRotateAllRightMaskedUint64x2(v) + case OpRotateAllRightMaskedUint64x4: + return rewriteValueAMD64_OpRotateAllRightMaskedUint64x4(v) + case OpRotateAllRightMaskedUint64x8: + return rewriteValueAMD64_OpRotateAllRightMaskedUint64x8(v) case OpRotateAllRightUint32x16: return rewriteValueAMD64_OpRotateAllRightUint32x16(v) case OpRotateAllRightUint32x4: @@ -4057,6 +3523,30 @@ func rewriteValueAMD64(v *Value) bool { case OpRotateLeftInt64x8: v.Op = OpAMD64VPROLVQ512 return true + case OpRotateLeftMaskedInt32x16: + return rewriteValueAMD64_OpRotateLeftMaskedInt32x16(v) + case OpRotateLeftMaskedInt32x4: + return rewriteValueAMD64_OpRotateLeftMaskedInt32x4(v) + case OpRotateLeftMaskedInt32x8: + return rewriteValueAMD64_OpRotateLeftMaskedInt32x8(v) + case OpRotateLeftMaskedInt64x2: + return rewriteValueAMD64_OpRotateLeftMaskedInt64x2(v) + case OpRotateLeftMaskedInt64x4: + return rewriteValueAMD64_OpRotateLeftMaskedInt64x4(v) + case OpRotateLeftMaskedInt64x8: + return rewriteValueAMD64_OpRotateLeftMaskedInt64x8(v) + case OpRotateLeftMaskedUint32x16: + return rewriteValueAMD64_OpRotateLeftMaskedUint32x16(v) + case OpRotateLeftMaskedUint32x4: + return rewriteValueAMD64_OpRotateLeftMaskedUint32x4(v) + case OpRotateLeftMaskedUint32x8: + return rewriteValueAMD64_OpRotateLeftMaskedUint32x8(v) + case OpRotateLeftMaskedUint64x2: + return rewriteValueAMD64_OpRotateLeftMaskedUint64x2(v) + case OpRotateLeftMaskedUint64x4: + return rewriteValueAMD64_OpRotateLeftMaskedUint64x4(v) + case OpRotateLeftMaskedUint64x8: + return rewriteValueAMD64_OpRotateLeftMaskedUint64x8(v) case OpRotateLeftUint32x16: v.Op = OpAMD64VPROLVD512 return true @@ -4093,6 +3583,30 @@ func rewriteValueAMD64(v *Value) bool { case OpRotateRightInt64x8: v.Op = OpAMD64VPRORVQ512 return true + case OpRotateRightMaskedInt32x16: + return rewriteValueAMD64_OpRotateRightMaskedInt32x16(v) + case OpRotateRightMaskedInt32x4: + return rewriteValueAMD64_OpRotateRightMaskedInt32x4(v) + case OpRotateRightMaskedInt32x8: + return rewriteValueAMD64_OpRotateRightMaskedInt32x8(v) + case OpRotateRightMaskedInt64x2: + return rewriteValueAMD64_OpRotateRightMaskedInt64x2(v) + case OpRotateRightMaskedInt64x4: + return rewriteValueAMD64_OpRotateRightMaskedInt64x4(v) + case OpRotateRightMaskedInt64x8: + return rewriteValueAMD64_OpRotateRightMaskedInt64x8(v) + case OpRotateRightMaskedUint32x16: + return rewriteValueAMD64_OpRotateRightMaskedUint32x16(v) + case OpRotateRightMaskedUint32x4: + return rewriteValueAMD64_OpRotateRightMaskedUint32x4(v) + case OpRotateRightMaskedUint32x8: + return rewriteValueAMD64_OpRotateRightMaskedUint32x8(v) + case OpRotateRightMaskedUint64x2: + return rewriteValueAMD64_OpRotateRightMaskedUint64x2(v) + case OpRotateRightMaskedUint64x4: + return rewriteValueAMD64_OpRotateRightMaskedUint64x4(v) + case OpRotateRightMaskedUint64x8: + return rewriteValueAMD64_OpRotateRightMaskedUint64x8(v) case OpRotateRightUint32x16: v.Op = OpAMD64VPRORVD512 return true @@ -4139,6 +3653,18 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpRoundWithPrecisionFloat64x4(v) case OpRoundWithPrecisionFloat64x8: return rewriteValueAMD64_OpRoundWithPrecisionFloat64x8(v) + case OpRoundWithPrecisionMaskedFloat32x16: + return rewriteValueAMD64_OpRoundWithPrecisionMaskedFloat32x16(v) + case OpRoundWithPrecisionMaskedFloat32x4: + return rewriteValueAMD64_OpRoundWithPrecisionMaskedFloat32x4(v) + case OpRoundWithPrecisionMaskedFloat32x8: + return rewriteValueAMD64_OpRoundWithPrecisionMaskedFloat32x8(v) + case OpRoundWithPrecisionMaskedFloat64x2: + return rewriteValueAMD64_OpRoundWithPrecisionMaskedFloat64x2(v) + case OpRoundWithPrecisionMaskedFloat64x4: + return rewriteValueAMD64_OpRoundWithPrecisionMaskedFloat64x4(v) + case OpRoundWithPrecisionMaskedFloat64x8: + return rewriteValueAMD64_OpRoundWithPrecisionMaskedFloat64x8(v) case OpRsh16Ux16: return rewriteValueAMD64_OpRsh16Ux16(v) case OpRsh16Ux32: @@ -4221,6 +3747,30 @@ func rewriteValueAMD64(v *Value) bool { case OpSaturatedAddInt8x64: v.Op = OpAMD64VPADDSB512 return true + case OpSaturatedAddMaskedInt16x16: + return rewriteValueAMD64_OpSaturatedAddMaskedInt16x16(v) + case OpSaturatedAddMaskedInt16x32: + return rewriteValueAMD64_OpSaturatedAddMaskedInt16x32(v) + case OpSaturatedAddMaskedInt16x8: + return rewriteValueAMD64_OpSaturatedAddMaskedInt16x8(v) + case OpSaturatedAddMaskedInt8x16: + return rewriteValueAMD64_OpSaturatedAddMaskedInt8x16(v) + case OpSaturatedAddMaskedInt8x32: + return rewriteValueAMD64_OpSaturatedAddMaskedInt8x32(v) + case OpSaturatedAddMaskedInt8x64: + return rewriteValueAMD64_OpSaturatedAddMaskedInt8x64(v) + case OpSaturatedAddMaskedUint16x16: + return rewriteValueAMD64_OpSaturatedAddMaskedUint16x16(v) + case OpSaturatedAddMaskedUint16x32: + return rewriteValueAMD64_OpSaturatedAddMaskedUint16x32(v) + case OpSaturatedAddMaskedUint16x8: + return rewriteValueAMD64_OpSaturatedAddMaskedUint16x8(v) + case OpSaturatedAddMaskedUint8x16: + return rewriteValueAMD64_OpSaturatedAddMaskedUint8x16(v) + case OpSaturatedAddMaskedUint8x32: + return rewriteValueAMD64_OpSaturatedAddMaskedUint8x32(v) + case OpSaturatedAddMaskedUint8x64: + return rewriteValueAMD64_OpSaturatedAddMaskedUint8x64(v) case OpSaturatedAddUint16x16: v.Op = OpAMD64VPADDSW256 return true @@ -4248,6 +3798,12 @@ func rewriteValueAMD64(v *Value) bool { case OpSaturatedPairDotProdAccumulateInt32x8: v.Op = OpAMD64VPDPWSSDS256 return true + case OpSaturatedPairDotProdAccumulateMaskedInt32x16: + return rewriteValueAMD64_OpSaturatedPairDotProdAccumulateMaskedInt32x16(v) + case OpSaturatedPairDotProdAccumulateMaskedInt32x4: + return rewriteValueAMD64_OpSaturatedPairDotProdAccumulateMaskedInt32x4(v) + case OpSaturatedPairDotProdAccumulateMaskedInt32x8: + return rewriteValueAMD64_OpSaturatedPairDotProdAccumulateMaskedInt32x8(v) case OpSaturatedPairwiseAddInt16x16: v.Op = OpAMD64VPHADDSW256 return true @@ -4278,6 +3834,30 @@ func rewriteValueAMD64(v *Value) bool { case OpSaturatedSubInt8x64: v.Op = OpAMD64VPSUBSB512 return true + case OpSaturatedSubMaskedInt16x16: + return rewriteValueAMD64_OpSaturatedSubMaskedInt16x16(v) + case OpSaturatedSubMaskedInt16x32: + return rewriteValueAMD64_OpSaturatedSubMaskedInt16x32(v) + case OpSaturatedSubMaskedInt16x8: + return rewriteValueAMD64_OpSaturatedSubMaskedInt16x8(v) + case OpSaturatedSubMaskedInt8x16: + return rewriteValueAMD64_OpSaturatedSubMaskedInt8x16(v) + case OpSaturatedSubMaskedInt8x32: + return rewriteValueAMD64_OpSaturatedSubMaskedInt8x32(v) + case OpSaturatedSubMaskedInt8x64: + return rewriteValueAMD64_OpSaturatedSubMaskedInt8x64(v) + case OpSaturatedSubMaskedUint16x16: + return rewriteValueAMD64_OpSaturatedSubMaskedUint16x16(v) + case OpSaturatedSubMaskedUint16x32: + return rewriteValueAMD64_OpSaturatedSubMaskedUint16x32(v) + case OpSaturatedSubMaskedUint16x8: + return rewriteValueAMD64_OpSaturatedSubMaskedUint16x8(v) + case OpSaturatedSubMaskedUint8x16: + return rewriteValueAMD64_OpSaturatedSubMaskedUint8x16(v) + case OpSaturatedSubMaskedUint8x32: + return rewriteValueAMD64_OpSaturatedSubMaskedUint8x32(v) + case OpSaturatedSubMaskedUint8x64: + return rewriteValueAMD64_OpSaturatedSubMaskedUint8x64(v) case OpSaturatedSubUint16x16: v.Op = OpAMD64VPSUBSW256 return true @@ -4296,6 +3876,12 @@ func rewriteValueAMD64(v *Value) bool { case OpSaturatedSubUint8x64: v.Op = OpAMD64VPSUBSB512 return true + case OpSaturatedUnsignedSignedPairDotProdMaskedUint8x16: + return rewriteValueAMD64_OpSaturatedUnsignedSignedPairDotProdMaskedUint8x16(v) + case OpSaturatedUnsignedSignedPairDotProdMaskedUint8x32: + return rewriteValueAMD64_OpSaturatedUnsignedSignedPairDotProdMaskedUint8x32(v) + case OpSaturatedUnsignedSignedPairDotProdMaskedUint8x64: + return rewriteValueAMD64_OpSaturatedUnsignedSignedPairDotProdMaskedUint8x64(v) case OpSaturatedUnsignedSignedPairDotProdUint8x16: v.Op = OpAMD64VPMADDUBSW128 return true @@ -4314,6 +3900,18 @@ func rewriteValueAMD64(v *Value) bool { case OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8: v.Op = OpAMD64VPDPBUSDS256 return true + case OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x16: + return rewriteValueAMD64_OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x16(v) + case OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x4: + return rewriteValueAMD64_OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x4(v) + case OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x8: + return rewriteValueAMD64_OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x8(v) + case OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x16: + return rewriteValueAMD64_OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x16(v) + case OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x4: + return rewriteValueAMD64_OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x4(v) + case OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x8: + return rewriteValueAMD64_OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x8(v) case OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16: v.Op = OpAMD64VPDPBUSDS512 return true @@ -4383,6 +3981,42 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromInt64x4(v) case OpShiftAllLeftAndFillUpperFromInt64x8: return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromInt64x8(v) + case OpShiftAllLeftAndFillUpperFromMaskedInt16x16: + return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedInt16x16(v) + case OpShiftAllLeftAndFillUpperFromMaskedInt16x32: + return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedInt16x32(v) + case OpShiftAllLeftAndFillUpperFromMaskedInt16x8: + return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedInt16x8(v) + case OpShiftAllLeftAndFillUpperFromMaskedInt32x16: + return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedInt32x16(v) + case OpShiftAllLeftAndFillUpperFromMaskedInt32x4: + return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedInt32x4(v) + case OpShiftAllLeftAndFillUpperFromMaskedInt32x8: + return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedInt32x8(v) + case OpShiftAllLeftAndFillUpperFromMaskedInt64x2: + return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedInt64x2(v) + case OpShiftAllLeftAndFillUpperFromMaskedInt64x4: + return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedInt64x4(v) + case OpShiftAllLeftAndFillUpperFromMaskedInt64x8: + return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedInt64x8(v) + case OpShiftAllLeftAndFillUpperFromMaskedUint16x16: + return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedUint16x16(v) + case OpShiftAllLeftAndFillUpperFromMaskedUint16x32: + return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedUint16x32(v) + case OpShiftAllLeftAndFillUpperFromMaskedUint16x8: + return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedUint16x8(v) + case OpShiftAllLeftAndFillUpperFromMaskedUint32x16: + return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedUint32x16(v) + case OpShiftAllLeftAndFillUpperFromMaskedUint32x4: + return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedUint32x4(v) + case OpShiftAllLeftAndFillUpperFromMaskedUint32x8: + return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedUint32x8(v) + case OpShiftAllLeftAndFillUpperFromMaskedUint64x2: + return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedUint64x2(v) + case OpShiftAllLeftAndFillUpperFromMaskedUint64x4: + return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedUint64x4(v) + case OpShiftAllLeftAndFillUpperFromMaskedUint64x8: + return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedUint64x8(v) case OpShiftAllLeftAndFillUpperFromUint16x16: return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromUint16x16(v) case OpShiftAllLeftAndFillUpperFromUint16x32: @@ -4422,6 +4056,18 @@ func rewriteValueAMD64(v *Value) bool { case OpShiftAllLeftInt64x8: v.Op = OpAMD64VPSLLQ512 return true + case OpShiftAllLeftMaskedInt64x2: + return rewriteValueAMD64_OpShiftAllLeftMaskedInt64x2(v) + case OpShiftAllLeftMaskedInt64x4: + return rewriteValueAMD64_OpShiftAllLeftMaskedInt64x4(v) + case OpShiftAllLeftMaskedInt64x8: + return rewriteValueAMD64_OpShiftAllLeftMaskedInt64x8(v) + case OpShiftAllLeftMaskedUint64x2: + return rewriteValueAMD64_OpShiftAllLeftMaskedUint64x2(v) + case OpShiftAllLeftMaskedUint64x4: + return rewriteValueAMD64_OpShiftAllLeftMaskedUint64x4(v) + case OpShiftAllLeftMaskedUint64x8: + return rewriteValueAMD64_OpShiftAllLeftMaskedUint64x8(v) case OpShiftAllLeftUint16x16: v.Op = OpAMD64VPSLLW256 return true @@ -4461,6 +4107,42 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromInt64x4(v) case OpShiftAllRightAndFillUpperFromInt64x8: return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromInt64x8(v) + case OpShiftAllRightAndFillUpperFromMaskedInt16x16: + return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedInt16x16(v) + case OpShiftAllRightAndFillUpperFromMaskedInt16x32: + return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedInt16x32(v) + case OpShiftAllRightAndFillUpperFromMaskedInt16x8: + return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedInt16x8(v) + case OpShiftAllRightAndFillUpperFromMaskedInt32x16: + return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedInt32x16(v) + case OpShiftAllRightAndFillUpperFromMaskedInt32x4: + return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedInt32x4(v) + case OpShiftAllRightAndFillUpperFromMaskedInt32x8: + return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedInt32x8(v) + case OpShiftAllRightAndFillUpperFromMaskedInt64x2: + return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedInt64x2(v) + case OpShiftAllRightAndFillUpperFromMaskedInt64x4: + return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedInt64x4(v) + case OpShiftAllRightAndFillUpperFromMaskedInt64x8: + return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedInt64x8(v) + case OpShiftAllRightAndFillUpperFromMaskedUint16x16: + return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedUint16x16(v) + case OpShiftAllRightAndFillUpperFromMaskedUint16x32: + return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedUint16x32(v) + case OpShiftAllRightAndFillUpperFromMaskedUint16x8: + return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedUint16x8(v) + case OpShiftAllRightAndFillUpperFromMaskedUint32x16: + return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedUint32x16(v) + case OpShiftAllRightAndFillUpperFromMaskedUint32x4: + return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedUint32x4(v) + case OpShiftAllRightAndFillUpperFromMaskedUint32x8: + return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedUint32x8(v) + case OpShiftAllRightAndFillUpperFromMaskedUint64x2: + return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedUint64x2(v) + case OpShiftAllRightAndFillUpperFromMaskedUint64x4: + return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedUint64x4(v) + case OpShiftAllRightAndFillUpperFromMaskedUint64x8: + return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedUint64x8(v) case OpShiftAllRightAndFillUpperFromUint16x16: return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromUint16x16(v) case OpShiftAllRightAndFillUpperFromUint16x32: @@ -4500,6 +4182,18 @@ func rewriteValueAMD64(v *Value) bool { case OpShiftAllRightInt64x8: v.Op = OpAMD64VPSRLQ512 return true + case OpShiftAllRightMaskedInt64x2: + return rewriteValueAMD64_OpShiftAllRightMaskedInt64x2(v) + case OpShiftAllRightMaskedInt64x4: + return rewriteValueAMD64_OpShiftAllRightMaskedInt64x4(v) + case OpShiftAllRightMaskedInt64x8: + return rewriteValueAMD64_OpShiftAllRightMaskedInt64x8(v) + case OpShiftAllRightMaskedUint64x2: + return rewriteValueAMD64_OpShiftAllRightMaskedUint64x2(v) + case OpShiftAllRightMaskedUint64x4: + return rewriteValueAMD64_OpShiftAllRightMaskedUint64x4(v) + case OpShiftAllRightMaskedUint64x8: + return rewriteValueAMD64_OpShiftAllRightMaskedUint64x8(v) case OpShiftAllRightSignExtendedInt16x16: v.Op = OpAMD64VPSRAW256 return true @@ -4521,6 +4215,12 @@ func rewriteValueAMD64(v *Value) bool { case OpShiftAllRightSignExtendedInt64x8: v.Op = OpAMD64VPSRAQ512 return true + case OpShiftAllRightSignExtendedMaskedInt64x2: + return rewriteValueAMD64_OpShiftAllRightSignExtendedMaskedInt64x2(v) + case OpShiftAllRightSignExtendedMaskedInt64x4: + return rewriteValueAMD64_OpShiftAllRightSignExtendedMaskedInt64x4(v) + case OpShiftAllRightSignExtendedMaskedInt64x8: + return rewriteValueAMD64_OpShiftAllRightSignExtendedMaskedInt64x8(v) case OpShiftAllRightUint16x16: v.Op = OpAMD64VPSRLW256 return true @@ -4569,6 +4269,42 @@ func rewriteValueAMD64(v *Value) bool { case OpShiftLeftAndFillUpperFromInt64x8: v.Op = OpAMD64VPSHLDVQ512 return true + case OpShiftLeftAndFillUpperFromMaskedInt16x16: + return rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedInt16x16(v) + case OpShiftLeftAndFillUpperFromMaskedInt16x32: + return rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedInt16x32(v) + case OpShiftLeftAndFillUpperFromMaskedInt16x8: + return rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedInt16x8(v) + case OpShiftLeftAndFillUpperFromMaskedInt32x16: + return rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedInt32x16(v) + case OpShiftLeftAndFillUpperFromMaskedInt32x4: + return rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedInt32x4(v) + case OpShiftLeftAndFillUpperFromMaskedInt32x8: + return rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedInt32x8(v) + case OpShiftLeftAndFillUpperFromMaskedInt64x2: + return rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedInt64x2(v) + case OpShiftLeftAndFillUpperFromMaskedInt64x4: + return rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedInt64x4(v) + case OpShiftLeftAndFillUpperFromMaskedInt64x8: + return rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedInt64x8(v) + case OpShiftLeftAndFillUpperFromMaskedUint16x16: + return rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedUint16x16(v) + case OpShiftLeftAndFillUpperFromMaskedUint16x32: + return rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedUint16x32(v) + case OpShiftLeftAndFillUpperFromMaskedUint16x8: + return rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedUint16x8(v) + case OpShiftLeftAndFillUpperFromMaskedUint32x16: + return rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedUint32x16(v) + case OpShiftLeftAndFillUpperFromMaskedUint32x4: + return rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedUint32x4(v) + case OpShiftLeftAndFillUpperFromMaskedUint32x8: + return rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedUint32x8(v) + case OpShiftLeftAndFillUpperFromMaskedUint64x2: + return rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedUint64x2(v) + case OpShiftLeftAndFillUpperFromMaskedUint64x4: + return rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedUint64x4(v) + case OpShiftLeftAndFillUpperFromMaskedUint64x8: + return rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedUint64x8(v) case OpShiftLeftAndFillUpperFromUint16x16: v.Op = OpAMD64VPSHLDVW256 return true @@ -4623,6 +4359,42 @@ func rewriteValueAMD64(v *Value) bool { case OpShiftLeftInt64x8: v.Op = OpAMD64VPSLLVQ512 return true + case OpShiftLeftMaskedInt16x16: + return rewriteValueAMD64_OpShiftLeftMaskedInt16x16(v) + case OpShiftLeftMaskedInt16x32: + return rewriteValueAMD64_OpShiftLeftMaskedInt16x32(v) + case OpShiftLeftMaskedInt16x8: + return rewriteValueAMD64_OpShiftLeftMaskedInt16x8(v) + case OpShiftLeftMaskedInt32x16: + return rewriteValueAMD64_OpShiftLeftMaskedInt32x16(v) + case OpShiftLeftMaskedInt32x4: + return rewriteValueAMD64_OpShiftLeftMaskedInt32x4(v) + case OpShiftLeftMaskedInt32x8: + return rewriteValueAMD64_OpShiftLeftMaskedInt32x8(v) + case OpShiftLeftMaskedInt64x2: + return rewriteValueAMD64_OpShiftLeftMaskedInt64x2(v) + case OpShiftLeftMaskedInt64x4: + return rewriteValueAMD64_OpShiftLeftMaskedInt64x4(v) + case OpShiftLeftMaskedInt64x8: + return rewriteValueAMD64_OpShiftLeftMaskedInt64x8(v) + case OpShiftLeftMaskedUint16x16: + return rewriteValueAMD64_OpShiftLeftMaskedUint16x16(v) + case OpShiftLeftMaskedUint16x32: + return rewriteValueAMD64_OpShiftLeftMaskedUint16x32(v) + case OpShiftLeftMaskedUint16x8: + return rewriteValueAMD64_OpShiftLeftMaskedUint16x8(v) + case OpShiftLeftMaskedUint32x16: + return rewriteValueAMD64_OpShiftLeftMaskedUint32x16(v) + case OpShiftLeftMaskedUint32x4: + return rewriteValueAMD64_OpShiftLeftMaskedUint32x4(v) + case OpShiftLeftMaskedUint32x8: + return rewriteValueAMD64_OpShiftLeftMaskedUint32x8(v) + case OpShiftLeftMaskedUint64x2: + return rewriteValueAMD64_OpShiftLeftMaskedUint64x2(v) + case OpShiftLeftMaskedUint64x4: + return rewriteValueAMD64_OpShiftLeftMaskedUint64x4(v) + case OpShiftLeftMaskedUint64x8: + return rewriteValueAMD64_OpShiftLeftMaskedUint64x8(v) case OpShiftLeftUint16x16: v.Op = OpAMD64VPSLLVW256 return true @@ -4677,6 +4449,42 @@ func rewriteValueAMD64(v *Value) bool { case OpShiftRightAndFillUpperFromInt64x8: v.Op = OpAMD64VPSHRDVQ512 return true + case OpShiftRightAndFillUpperFromMaskedInt16x16: + return rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedInt16x16(v) + case OpShiftRightAndFillUpperFromMaskedInt16x32: + return rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedInt16x32(v) + case OpShiftRightAndFillUpperFromMaskedInt16x8: + return rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedInt16x8(v) + case OpShiftRightAndFillUpperFromMaskedInt32x16: + return rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedInt32x16(v) + case OpShiftRightAndFillUpperFromMaskedInt32x4: + return rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedInt32x4(v) + case OpShiftRightAndFillUpperFromMaskedInt32x8: + return rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedInt32x8(v) + case OpShiftRightAndFillUpperFromMaskedInt64x2: + return rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedInt64x2(v) + case OpShiftRightAndFillUpperFromMaskedInt64x4: + return rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedInt64x4(v) + case OpShiftRightAndFillUpperFromMaskedInt64x8: + return rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedInt64x8(v) + case OpShiftRightAndFillUpperFromMaskedUint16x16: + return rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedUint16x16(v) + case OpShiftRightAndFillUpperFromMaskedUint16x32: + return rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedUint16x32(v) + case OpShiftRightAndFillUpperFromMaskedUint16x8: + return rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedUint16x8(v) + case OpShiftRightAndFillUpperFromMaskedUint32x16: + return rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedUint32x16(v) + case OpShiftRightAndFillUpperFromMaskedUint32x4: + return rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedUint32x4(v) + case OpShiftRightAndFillUpperFromMaskedUint32x8: + return rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedUint32x8(v) + case OpShiftRightAndFillUpperFromMaskedUint64x2: + return rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedUint64x2(v) + case OpShiftRightAndFillUpperFromMaskedUint64x4: + return rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedUint64x4(v) + case OpShiftRightAndFillUpperFromMaskedUint64x8: + return rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedUint64x8(v) case OpShiftRightAndFillUpperFromUint16x16: v.Op = OpAMD64VPSHRDVW256 return true @@ -4731,6 +4539,42 @@ func rewriteValueAMD64(v *Value) bool { case OpShiftRightInt64x8: v.Op = OpAMD64VPSRLVQ512 return true + case OpShiftRightMaskedInt16x16: + return rewriteValueAMD64_OpShiftRightMaskedInt16x16(v) + case OpShiftRightMaskedInt16x32: + return rewriteValueAMD64_OpShiftRightMaskedInt16x32(v) + case OpShiftRightMaskedInt16x8: + return rewriteValueAMD64_OpShiftRightMaskedInt16x8(v) + case OpShiftRightMaskedInt32x16: + return rewriteValueAMD64_OpShiftRightMaskedInt32x16(v) + case OpShiftRightMaskedInt32x4: + return rewriteValueAMD64_OpShiftRightMaskedInt32x4(v) + case OpShiftRightMaskedInt32x8: + return rewriteValueAMD64_OpShiftRightMaskedInt32x8(v) + case OpShiftRightMaskedInt64x2: + return rewriteValueAMD64_OpShiftRightMaskedInt64x2(v) + case OpShiftRightMaskedInt64x4: + return rewriteValueAMD64_OpShiftRightMaskedInt64x4(v) + case OpShiftRightMaskedInt64x8: + return rewriteValueAMD64_OpShiftRightMaskedInt64x8(v) + case OpShiftRightMaskedUint16x16: + return rewriteValueAMD64_OpShiftRightMaskedUint16x16(v) + case OpShiftRightMaskedUint16x32: + return rewriteValueAMD64_OpShiftRightMaskedUint16x32(v) + case OpShiftRightMaskedUint16x8: + return rewriteValueAMD64_OpShiftRightMaskedUint16x8(v) + case OpShiftRightMaskedUint32x16: + return rewriteValueAMD64_OpShiftRightMaskedUint32x16(v) + case OpShiftRightMaskedUint32x4: + return rewriteValueAMD64_OpShiftRightMaskedUint32x4(v) + case OpShiftRightMaskedUint32x8: + return rewriteValueAMD64_OpShiftRightMaskedUint32x8(v) + case OpShiftRightMaskedUint64x2: + return rewriteValueAMD64_OpShiftRightMaskedUint64x2(v) + case OpShiftRightMaskedUint64x4: + return rewriteValueAMD64_OpShiftRightMaskedUint64x4(v) + case OpShiftRightMaskedUint64x8: + return rewriteValueAMD64_OpShiftRightMaskedUint64x8(v) case OpShiftRightSignExtendedInt16x16: v.Op = OpAMD64VPSRAVW256 return true @@ -4758,6 +4602,42 @@ func rewriteValueAMD64(v *Value) bool { case OpShiftRightSignExtendedInt64x8: v.Op = OpAMD64VPSRAVQ512 return true + case OpShiftRightSignExtendedMaskedInt16x16: + return rewriteValueAMD64_OpShiftRightSignExtendedMaskedInt16x16(v) + case OpShiftRightSignExtendedMaskedInt16x32: + return rewriteValueAMD64_OpShiftRightSignExtendedMaskedInt16x32(v) + case OpShiftRightSignExtendedMaskedInt16x8: + return rewriteValueAMD64_OpShiftRightSignExtendedMaskedInt16x8(v) + case OpShiftRightSignExtendedMaskedInt32x16: + return rewriteValueAMD64_OpShiftRightSignExtendedMaskedInt32x16(v) + case OpShiftRightSignExtendedMaskedInt32x4: + return rewriteValueAMD64_OpShiftRightSignExtendedMaskedInt32x4(v) + case OpShiftRightSignExtendedMaskedInt32x8: + return rewriteValueAMD64_OpShiftRightSignExtendedMaskedInt32x8(v) + case OpShiftRightSignExtendedMaskedInt64x2: + return rewriteValueAMD64_OpShiftRightSignExtendedMaskedInt64x2(v) + case OpShiftRightSignExtendedMaskedInt64x4: + return rewriteValueAMD64_OpShiftRightSignExtendedMaskedInt64x4(v) + case OpShiftRightSignExtendedMaskedInt64x8: + return rewriteValueAMD64_OpShiftRightSignExtendedMaskedInt64x8(v) + case OpShiftRightSignExtendedMaskedUint16x16: + return rewriteValueAMD64_OpShiftRightSignExtendedMaskedUint16x16(v) + case OpShiftRightSignExtendedMaskedUint16x32: + return rewriteValueAMD64_OpShiftRightSignExtendedMaskedUint16x32(v) + case OpShiftRightSignExtendedMaskedUint16x8: + return rewriteValueAMD64_OpShiftRightSignExtendedMaskedUint16x8(v) + case OpShiftRightSignExtendedMaskedUint32x16: + return rewriteValueAMD64_OpShiftRightSignExtendedMaskedUint32x16(v) + case OpShiftRightSignExtendedMaskedUint32x4: + return rewriteValueAMD64_OpShiftRightSignExtendedMaskedUint32x4(v) + case OpShiftRightSignExtendedMaskedUint32x8: + return rewriteValueAMD64_OpShiftRightSignExtendedMaskedUint32x8(v) + case OpShiftRightSignExtendedMaskedUint64x2: + return rewriteValueAMD64_OpShiftRightSignExtendedMaskedUint64x2(v) + case OpShiftRightSignExtendedMaskedUint64x4: + return rewriteValueAMD64_OpShiftRightSignExtendedMaskedUint64x4(v) + case OpShiftRightSignExtendedMaskedUint64x8: + return rewriteValueAMD64_OpShiftRightSignExtendedMaskedUint64x8(v) case OpShiftRightSignExtendedUint16x16: v.Op = OpAMD64VPSRAVW256 return true @@ -4878,6 +4758,18 @@ func rewriteValueAMD64(v *Value) bool { case OpSqrtFloat64x8: v.Op = OpAMD64VSQRTPD512 return true + case OpSqrtMaskedFloat32x16: + return rewriteValueAMD64_OpSqrtMaskedFloat32x16(v) + case OpSqrtMaskedFloat32x4: + return rewriteValueAMD64_OpSqrtMaskedFloat32x4(v) + case OpSqrtMaskedFloat32x8: + return rewriteValueAMD64_OpSqrtMaskedFloat32x8(v) + case OpSqrtMaskedFloat64x2: + return rewriteValueAMD64_OpSqrtMaskedFloat64x2(v) + case OpSqrtMaskedFloat64x4: + return rewriteValueAMD64_OpSqrtMaskedFloat64x4(v) + case OpSqrtMaskedFloat64x8: + return rewriteValueAMD64_OpSqrtMaskedFloat64x8(v) case OpStaticCall: v.Op = OpAMD64CALLstatic return true @@ -4955,6 +4847,66 @@ func rewriteValueAMD64(v *Value) bool { case OpSubInt8x64: v.Op = OpAMD64VPSUBB512 return true + case OpSubMaskedFloat32x16: + return rewriteValueAMD64_OpSubMaskedFloat32x16(v) + case OpSubMaskedFloat32x4: + return rewriteValueAMD64_OpSubMaskedFloat32x4(v) + case OpSubMaskedFloat32x8: + return rewriteValueAMD64_OpSubMaskedFloat32x8(v) + case OpSubMaskedFloat64x2: + return rewriteValueAMD64_OpSubMaskedFloat64x2(v) + case OpSubMaskedFloat64x4: + return rewriteValueAMD64_OpSubMaskedFloat64x4(v) + case OpSubMaskedFloat64x8: + return rewriteValueAMD64_OpSubMaskedFloat64x8(v) + case OpSubMaskedInt16x16: + return rewriteValueAMD64_OpSubMaskedInt16x16(v) + case OpSubMaskedInt16x32: + return rewriteValueAMD64_OpSubMaskedInt16x32(v) + case OpSubMaskedInt16x8: + return rewriteValueAMD64_OpSubMaskedInt16x8(v) + case OpSubMaskedInt32x16: + return rewriteValueAMD64_OpSubMaskedInt32x16(v) + case OpSubMaskedInt32x4: + return rewriteValueAMD64_OpSubMaskedInt32x4(v) + case OpSubMaskedInt32x8: + return rewriteValueAMD64_OpSubMaskedInt32x8(v) + case OpSubMaskedInt64x2: + return rewriteValueAMD64_OpSubMaskedInt64x2(v) + case OpSubMaskedInt64x4: + return rewriteValueAMD64_OpSubMaskedInt64x4(v) + case OpSubMaskedInt64x8: + return rewriteValueAMD64_OpSubMaskedInt64x8(v) + case OpSubMaskedInt8x16: + return rewriteValueAMD64_OpSubMaskedInt8x16(v) + case OpSubMaskedInt8x32: + return rewriteValueAMD64_OpSubMaskedInt8x32(v) + case OpSubMaskedInt8x64: + return rewriteValueAMD64_OpSubMaskedInt8x64(v) + case OpSubMaskedUint16x16: + return rewriteValueAMD64_OpSubMaskedUint16x16(v) + case OpSubMaskedUint16x32: + return rewriteValueAMD64_OpSubMaskedUint16x32(v) + case OpSubMaskedUint16x8: + return rewriteValueAMD64_OpSubMaskedUint16x8(v) + case OpSubMaskedUint32x16: + return rewriteValueAMD64_OpSubMaskedUint32x16(v) + case OpSubMaskedUint32x4: + return rewriteValueAMD64_OpSubMaskedUint32x4(v) + case OpSubMaskedUint32x8: + return rewriteValueAMD64_OpSubMaskedUint32x8(v) + case OpSubMaskedUint64x2: + return rewriteValueAMD64_OpSubMaskedUint64x2(v) + case OpSubMaskedUint64x4: + return rewriteValueAMD64_OpSubMaskedUint64x4(v) + case OpSubMaskedUint64x8: + return rewriteValueAMD64_OpSubMaskedUint64x8(v) + case OpSubMaskedUint8x16: + return rewriteValueAMD64_OpSubMaskedUint8x16(v) + case OpSubMaskedUint8x32: + return rewriteValueAMD64_OpSubMaskedUint8x32(v) + case OpSubMaskedUint8x64: + return rewriteValueAMD64_OpSubMaskedUint8x64(v) case OpSubPtr: v.Op = OpAMD64SUBQ return true @@ -5037,6 +4989,18 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpTruncWithPrecisionFloat64x4(v) case OpTruncWithPrecisionFloat64x8: return rewriteValueAMD64_OpTruncWithPrecisionFloat64x8(v) + case OpTruncWithPrecisionMaskedFloat32x16: + return rewriteValueAMD64_OpTruncWithPrecisionMaskedFloat32x16(v) + case OpTruncWithPrecisionMaskedFloat32x4: + return rewriteValueAMD64_OpTruncWithPrecisionMaskedFloat32x4(v) + case OpTruncWithPrecisionMaskedFloat32x8: + return rewriteValueAMD64_OpTruncWithPrecisionMaskedFloat32x8(v) + case OpTruncWithPrecisionMaskedFloat64x2: + return rewriteValueAMD64_OpTruncWithPrecisionMaskedFloat64x2(v) + case OpTruncWithPrecisionMaskedFloat64x4: + return rewriteValueAMD64_OpTruncWithPrecisionMaskedFloat64x4(v) + case OpTruncWithPrecisionMaskedFloat64x8: + return rewriteValueAMD64_OpTruncWithPrecisionMaskedFloat64x8(v) case OpUnsignedSignedQuadDotProdAccumulateInt32x16: v.Op = OpAMD64VPDPBUSD512 return true @@ -5046,6 +5010,18 @@ func rewriteValueAMD64(v *Value) bool { case OpUnsignedSignedQuadDotProdAccumulateInt32x8: v.Op = OpAMD64VPDPBUSD256 return true + case OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x16: + return rewriteValueAMD64_OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x16(v) + case OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x4: + return rewriteValueAMD64_OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x4(v) + case OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x8: + return rewriteValueAMD64_OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x8(v) + case OpUnsignedSignedQuadDotProdAccumulateMaskedUint32x16: + return rewriteValueAMD64_OpUnsignedSignedQuadDotProdAccumulateMaskedUint32x16(v) + case OpUnsignedSignedQuadDotProdAccumulateMaskedUint32x4: + return rewriteValueAMD64_OpUnsignedSignedQuadDotProdAccumulateMaskedUint32x4(v) + case OpUnsignedSignedQuadDotProdAccumulateMaskedUint32x8: + return rewriteValueAMD64_OpUnsignedSignedQuadDotProdAccumulateMaskedUint32x8(v) case OpUnsignedSignedQuadDotProdAccumulateUint32x16: v.Op = OpAMD64VPDPBUSD512 return true @@ -5100,6 +5076,30 @@ func rewriteValueAMD64(v *Value) bool { case OpXorInt8x32: v.Op = OpAMD64VPXOR256 return true + case OpXorMaskedInt32x16: + return rewriteValueAMD64_OpXorMaskedInt32x16(v) + case OpXorMaskedInt32x4: + return rewriteValueAMD64_OpXorMaskedInt32x4(v) + case OpXorMaskedInt32x8: + return rewriteValueAMD64_OpXorMaskedInt32x8(v) + case OpXorMaskedInt64x2: + return rewriteValueAMD64_OpXorMaskedInt64x2(v) + case OpXorMaskedInt64x4: + return rewriteValueAMD64_OpXorMaskedInt64x4(v) + case OpXorMaskedInt64x8: + return rewriteValueAMD64_OpXorMaskedInt64x8(v) + case OpXorMaskedUint32x16: + return rewriteValueAMD64_OpXorMaskedUint32x16(v) + case OpXorMaskedUint32x4: + return rewriteValueAMD64_OpXorMaskedUint32x4(v) + case OpXorMaskedUint32x8: + return rewriteValueAMD64_OpXorMaskedUint32x8(v) + case OpXorMaskedUint64x2: + return rewriteValueAMD64_OpXorMaskedUint64x2(v) + case OpXorMaskedUint64x4: + return rewriteValueAMD64_OpXorMaskedUint64x4(v) + case OpXorMaskedUint64x8: + return rewriteValueAMD64_OpXorMaskedUint64x8(v) case OpXorUint16x16: v.Op = OpAMD64VPXOR256 return true @@ -27834,8704 +27834,8578 @@ func rewriteValueAMD64_OpAMD64XORQmodify(v *Value) bool { } return false } -func rewriteValueAMD64_OpAddr(v *Value) bool { +func rewriteValueAMD64_OpAbsoluteMaskedInt16x16(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (Addr {sym} base) - // result: (LEAQ {sym} base) + b := v.Block + // match: (AbsoluteMaskedInt16x16 x mask) + // result: (VPABSWMasked256 x (VPMOVVec16x16ToM mask)) for { - sym := auxToSym(v.Aux) - base := v_0 - v.reset(OpAMD64LEAQ) - v.Aux = symToAux(sym) - v.AddArg(base) + x := v_0 + mask := v_1 + v.reset(OpAMD64VPABSWMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpAtomicAdd32(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAbsoluteMaskedInt16x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (AtomicAdd32 ptr val mem) - // result: (AddTupleFirst32 val (XADDLlock val ptr mem)) + // match: (AbsoluteMaskedInt16x32 x mask) + // result: (VPABSWMasked512 x (VPMOVVec16x32ToM mask)) for { - ptr := v_0 - val := v_1 - mem := v_2 - v.reset(OpAMD64AddTupleFirst32) - v0 := b.NewValue0(v.Pos, OpAMD64XADDLlock, types.NewTuple(typ.UInt32, types.TypeMem)) - v0.AddArg3(val, ptr, mem) - v.AddArg2(val, v0) + x := v_0 + mask := v_1 + v.reset(OpAMD64VPABSWMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpAtomicAdd64(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAbsoluteMaskedInt16x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (AtomicAdd64 ptr val mem) - // result: (AddTupleFirst64 val (XADDQlock val ptr mem)) + // match: (AbsoluteMaskedInt16x8 x mask) + // result: (VPABSWMasked128 x (VPMOVVec16x8ToM mask)) for { - ptr := v_0 - val := v_1 - mem := v_2 - v.reset(OpAMD64AddTupleFirst64) - v0 := b.NewValue0(v.Pos, OpAMD64XADDQlock, types.NewTuple(typ.UInt64, types.TypeMem)) - v0.AddArg3(val, ptr, mem) - v.AddArg2(val, v0) + x := v_0 + mask := v_1 + v.reset(OpAMD64VPABSWMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpAtomicAnd32(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAbsoluteMaskedInt32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (AtomicAnd32 ptr val mem) - // result: (ANDLlock ptr val mem) + b := v.Block + // match: (AbsoluteMaskedInt32x16 x mask) + // result: (VPABSDMasked512 x (VPMOVVec32x16ToM mask)) for { - ptr := v_0 - val := v_1 - mem := v_2 - v.reset(OpAMD64ANDLlock) - v.AddArg3(ptr, val, mem) + x := v_0 + mask := v_1 + v.reset(OpAMD64VPABSDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpAtomicAnd32value(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAbsoluteMaskedInt32x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (AtomicAnd32value ptr val mem) - // result: (LoweredAtomicAnd32 ptr val mem) + b := v.Block + // match: (AbsoluteMaskedInt32x4 x mask) + // result: (VPABSDMasked128 x (VPMOVVec32x4ToM mask)) for { - ptr := v_0 - val := v_1 - mem := v_2 - v.reset(OpAMD64LoweredAtomicAnd32) - v.AddArg3(ptr, val, mem) + x := v_0 + mask := v_1 + v.reset(OpAMD64VPABSDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpAtomicAnd64value(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAbsoluteMaskedInt32x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (AtomicAnd64value ptr val mem) - // result: (LoweredAtomicAnd64 ptr val mem) + b := v.Block + // match: (AbsoluteMaskedInt32x8 x mask) + // result: (VPABSDMasked256 x (VPMOVVec32x8ToM mask)) for { - ptr := v_0 - val := v_1 - mem := v_2 - v.reset(OpAMD64LoweredAtomicAnd64) - v.AddArg3(ptr, val, mem) + x := v_0 + mask := v_1 + v.reset(OpAMD64VPABSDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpAtomicAnd8(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAbsoluteMaskedInt64x2(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (AtomicAnd8 ptr val mem) - // result: (ANDBlock ptr val mem) + b := v.Block + // match: (AbsoluteMaskedInt64x2 x mask) + // result: (VPABSQMasked128 x (VPMOVVec64x2ToM mask)) for { - ptr := v_0 - val := v_1 - mem := v_2 - v.reset(OpAMD64ANDBlock) - v.AddArg3(ptr, val, mem) + x := v_0 + mask := v_1 + v.reset(OpAMD64VPABSQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpAtomicCompareAndSwap32(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpAbsoluteMaskedInt64x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (AtomicCompareAndSwap32 ptr old new_ mem) - // result: (CMPXCHGLlock ptr old new_ mem) + b := v.Block + // match: (AbsoluteMaskedInt64x4 x mask) + // result: (VPABSQMasked256 x (VPMOVVec64x4ToM mask)) for { - ptr := v_0 - old := v_1 - new_ := v_2 - mem := v_3 - v.reset(OpAMD64CMPXCHGLlock) - v.AddArg4(ptr, old, new_, mem) + x := v_0 + mask := v_1 + v.reset(OpAMD64VPABSQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpAtomicCompareAndSwap64(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpAbsoluteMaskedInt64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (AtomicCompareAndSwap64 ptr old new_ mem) - // result: (CMPXCHGQlock ptr old new_ mem) + b := v.Block + // match: (AbsoluteMaskedInt64x8 x mask) + // result: (VPABSQMasked512 x (VPMOVVec64x8ToM mask)) for { - ptr := v_0 - old := v_1 - new_ := v_2 - mem := v_3 - v.reset(OpAMD64CMPXCHGQlock) - v.AddArg4(ptr, old, new_, mem) + x := v_0 + mask := v_1 + v.reset(OpAMD64VPABSQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpAtomicExchange32(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAbsoluteMaskedInt8x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (AtomicExchange32 ptr val mem) - // result: (XCHGL val ptr mem) + b := v.Block + // match: (AbsoluteMaskedInt8x16 x mask) + // result: (VPABSBMasked128 x (VPMOVVec8x16ToM mask)) for { - ptr := v_0 - val := v_1 - mem := v_2 - v.reset(OpAMD64XCHGL) - v.AddArg3(val, ptr, mem) + x := v_0 + mask := v_1 + v.reset(OpAMD64VPABSBMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpAtomicExchange64(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAbsoluteMaskedInt8x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (AtomicExchange64 ptr val mem) - // result: (XCHGQ val ptr mem) + b := v.Block + // match: (AbsoluteMaskedInt8x32 x mask) + // result: (VPABSBMasked256 x (VPMOVVec8x32ToM mask)) for { - ptr := v_0 - val := v_1 - mem := v_2 - v.reset(OpAMD64XCHGQ) - v.AddArg3(val, ptr, mem) + x := v_0 + mask := v_1 + v.reset(OpAMD64VPABSBMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpAtomicExchange8(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAbsoluteMaskedInt8x64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (AtomicExchange8 ptr val mem) - // result: (XCHGB val ptr mem) + b := v.Block + // match: (AbsoluteMaskedInt8x64 x mask) + // result: (VPABSBMasked512 x (VPMOVVec8x64ToM mask)) for { - ptr := v_0 - val := v_1 - mem := v_2 - v.reset(OpAMD64XCHGB) - v.AddArg3(val, ptr, mem) + x := v_0 + mask := v_1 + v.reset(OpAMD64VPABSBMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpAtomicLoad32(v *Value) bool { +func rewriteValueAMD64_OpAddMaskedFloat32x16(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (AtomicLoad32 ptr mem) - // result: (MOVLatomicload ptr mem) + b := v.Block + // match: (AddMaskedFloat32x16 x y mask) + // result: (VADDPSMasked512 x y (VPMOVVec32x16ToM mask)) for { - ptr := v_0 - mem := v_1 - v.reset(OpAMD64MOVLatomicload) - v.AddArg2(ptr, mem) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VADDPSMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpAtomicLoad64(v *Value) bool { +func rewriteValueAMD64_OpAddMaskedFloat32x4(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (AtomicLoad64 ptr mem) - // result: (MOVQatomicload ptr mem) + b := v.Block + // match: (AddMaskedFloat32x4 x y mask) + // result: (VADDPSMasked128 x y (VPMOVVec32x4ToM mask)) for { - ptr := v_0 - mem := v_1 - v.reset(OpAMD64MOVQatomicload) - v.AddArg2(ptr, mem) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VADDPSMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpAtomicLoad8(v *Value) bool { +func rewriteValueAMD64_OpAddMaskedFloat32x8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (AtomicLoad8 ptr mem) - // result: (MOVBatomicload ptr mem) + b := v.Block + // match: (AddMaskedFloat32x8 x y mask) + // result: (VADDPSMasked256 x y (VPMOVVec32x8ToM mask)) for { - ptr := v_0 - mem := v_1 - v.reset(OpAMD64MOVBatomicload) - v.AddArg2(ptr, mem) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VADDPSMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpAtomicLoadPtr(v *Value) bool { +func rewriteValueAMD64_OpAddMaskedFloat64x2(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (AtomicLoadPtr ptr mem) - // result: (MOVQatomicload ptr mem) + b := v.Block + // match: (AddMaskedFloat64x2 x y mask) + // result: (VADDPDMasked128 x y (VPMOVVec64x2ToM mask)) for { - ptr := v_0 - mem := v_1 - v.reset(OpAMD64MOVQatomicload) - v.AddArg2(ptr, mem) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VADDPDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpAtomicOr32(v *Value) bool { +func rewriteValueAMD64_OpAddMaskedFloat64x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (AtomicOr32 ptr val mem) - // result: (ORLlock ptr val mem) + b := v.Block + // match: (AddMaskedFloat64x4 x y mask) + // result: (VADDPDMasked256 x y (VPMOVVec64x4ToM mask)) for { - ptr := v_0 - val := v_1 - mem := v_2 - v.reset(OpAMD64ORLlock) - v.AddArg3(ptr, val, mem) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VADDPDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpAtomicOr32value(v *Value) bool { +func rewriteValueAMD64_OpAddMaskedFloat64x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (AtomicOr32value ptr val mem) - // result: (LoweredAtomicOr32 ptr val mem) + b := v.Block + // match: (AddMaskedFloat64x8 x y mask) + // result: (VADDPDMasked512 x y (VPMOVVec64x8ToM mask)) for { - ptr := v_0 - val := v_1 - mem := v_2 - v.reset(OpAMD64LoweredAtomicOr32) - v.AddArg3(ptr, val, mem) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VADDPDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpAtomicOr64value(v *Value) bool { +func rewriteValueAMD64_OpAddMaskedInt16x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (AtomicOr64value ptr val mem) - // result: (LoweredAtomicOr64 ptr val mem) + b := v.Block + // match: (AddMaskedInt16x16 x y mask) + // result: (VPADDWMasked256 x y (VPMOVVec16x16ToM mask)) for { - ptr := v_0 - val := v_1 - mem := v_2 - v.reset(OpAMD64LoweredAtomicOr64) - v.AddArg3(ptr, val, mem) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPADDWMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpAtomicOr8(v *Value) bool { +func rewriteValueAMD64_OpAddMaskedInt16x32(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (AtomicOr8 ptr val mem) - // result: (ORBlock ptr val mem) + b := v.Block + // match: (AddMaskedInt16x32 x y mask) + // result: (VPADDWMasked512 x y (VPMOVVec16x32ToM mask)) for { - ptr := v_0 - val := v_1 - mem := v_2 - v.reset(OpAMD64ORBlock) - v.AddArg3(ptr, val, mem) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPADDWMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpAtomicStore32(v *Value) bool { +func rewriteValueAMD64_OpAddMaskedInt16x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (AtomicStore32 ptr val mem) - // result: (Select1 (XCHGL val ptr mem)) + // match: (AddMaskedInt16x8 x y mask) + // result: (VPADDWMasked128 x y (VPMOVVec16x8ToM mask)) for { - ptr := v_0 - val := v_1 - mem := v_2 - v.reset(OpSelect1) - v0 := b.NewValue0(v.Pos, OpAMD64XCHGL, types.NewTuple(typ.UInt32, types.TypeMem)) - v0.AddArg3(val, ptr, mem) - v.AddArg(v0) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPADDWMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpAtomicStore64(v *Value) bool { +func rewriteValueAMD64_OpAddMaskedInt32x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (AtomicStore64 ptr val mem) - // result: (Select1 (XCHGQ val ptr mem)) + // match: (AddMaskedInt32x16 x y mask) + // result: (VPADDDMasked512 x y (VPMOVVec32x16ToM mask)) for { - ptr := v_0 - val := v_1 - mem := v_2 - v.reset(OpSelect1) - v0 := b.NewValue0(v.Pos, OpAMD64XCHGQ, types.NewTuple(typ.UInt64, types.TypeMem)) - v0.AddArg3(val, ptr, mem) - v.AddArg(v0) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPADDDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpAtomicStore8(v *Value) bool { +func rewriteValueAMD64_OpAddMaskedInt32x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (AtomicStore8 ptr val mem) - // result: (Select1 (XCHGB val ptr mem)) + // match: (AddMaskedInt32x4 x y mask) + // result: (VPADDDMasked128 x y (VPMOVVec32x4ToM mask)) for { - ptr := v_0 - val := v_1 - mem := v_2 - v.reset(OpSelect1) - v0 := b.NewValue0(v.Pos, OpAMD64XCHGB, types.NewTuple(typ.UInt8, types.TypeMem)) - v0.AddArg3(val, ptr, mem) - v.AddArg(v0) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPADDDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpAtomicStorePtrNoWB(v *Value) bool { +func rewriteValueAMD64_OpAddMaskedInt32x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (AtomicStorePtrNoWB ptr val mem) - // result: (Select1 (XCHGQ val ptr mem)) + // match: (AddMaskedInt32x8 x y mask) + // result: (VPADDDMasked256 x y (VPMOVVec32x8ToM mask)) for { - ptr := v_0 - val := v_1 - mem := v_2 - v.reset(OpSelect1) - v0 := b.NewValue0(v.Pos, OpAMD64XCHGQ, types.NewTuple(typ.BytePtr, types.TypeMem)) - v0.AddArg3(val, ptr, mem) - v.AddArg(v0) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPADDDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpBitLen16(v *Value) bool { +func rewriteValueAMD64_OpAddMaskedInt64x2(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (BitLen16 x) - // cond: buildcfg.GOAMD64 < 3 - // result: (BSRL (LEAL1 [1] (MOVWQZX x) (MOVWQZX x))) + // match: (AddMaskedInt64x2 x y mask) + // result: (VPADDQMasked128 x y (VPMOVVec64x2ToM mask)) for { x := v_0 - if !(buildcfg.GOAMD64 < 3) { - break - } - v.reset(OpAMD64BSRL) - v0 := b.NewValue0(v.Pos, OpAMD64LEAL1, typ.UInt32) - v0.AuxInt = int32ToAuxInt(1) - v1 := b.NewValue0(v.Pos, OpAMD64MOVWQZX, typ.UInt32) - v1.AddArg(x) - v0.AddArg2(v1, v1) - v.AddArg(v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPADDQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (BitLen16 x) - // cond: buildcfg.GOAMD64 >= 3 - // result: (NEGQ (ADDQconst [-32] (LZCNTL (MOVWQZX x)))) +} +func rewriteValueAMD64_OpAddMaskedInt64x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (AddMaskedInt64x4 x y mask) + // result: (VPADDQMasked256 x y (VPMOVVec64x4ToM mask)) for { - t := v.Type x := v_0 - if !(buildcfg.GOAMD64 >= 3) { - break - } - v.reset(OpAMD64NEGQ) - v0 := b.NewValue0(v.Pos, OpAMD64ADDQconst, t) - v0.AuxInt = int32ToAuxInt(-32) - v1 := b.NewValue0(v.Pos, OpAMD64LZCNTL, typ.UInt32) - v2 := b.NewValue0(v.Pos, OpAMD64MOVWQZX, x.Type) - v2.AddArg(x) - v1.AddArg(v2) - v0.AddArg(v1) - v.AddArg(v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPADDQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - return false } -func rewriteValueAMD64_OpBitLen32(v *Value) bool { +func rewriteValueAMD64_OpAddMaskedInt64x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (BitLen32 x) - // cond: buildcfg.GOAMD64 < 3 - // result: (Select0 (BSRQ (LEAQ1 [1] (MOVLQZX x) (MOVLQZX x)))) + // match: (AddMaskedInt64x8 x y mask) + // result: (VPADDQMasked512 x y (VPMOVVec64x8ToM mask)) for { x := v_0 - if !(buildcfg.GOAMD64 < 3) { - break - } - v.reset(OpSelect0) - v0 := b.NewValue0(v.Pos, OpAMD64BSRQ, types.NewTuple(typ.UInt64, types.TypeFlags)) - v1 := b.NewValue0(v.Pos, OpAMD64LEAQ1, typ.UInt64) - v1.AuxInt = int32ToAuxInt(1) - v2 := b.NewValue0(v.Pos, OpAMD64MOVLQZX, typ.UInt64) - v2.AddArg(x) - v1.AddArg2(v2, v2) - v0.AddArg(v1) - v.AddArg(v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPADDQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (BitLen32 x) - // cond: buildcfg.GOAMD64 >= 3 - // result: (NEGQ (ADDQconst [-32] (LZCNTL x))) +} +func rewriteValueAMD64_OpAddMaskedInt8x16(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (AddMaskedInt8x16 x y mask) + // result: (VPADDBMasked128 x y (VPMOVVec8x16ToM mask)) for { - t := v.Type x := v_0 - if !(buildcfg.GOAMD64 >= 3) { - break - } - v.reset(OpAMD64NEGQ) - v0 := b.NewValue0(v.Pos, OpAMD64ADDQconst, t) - v0.AuxInt = int32ToAuxInt(-32) - v1 := b.NewValue0(v.Pos, OpAMD64LZCNTL, typ.UInt32) - v1.AddArg(x) - v0.AddArg(v1) - v.AddArg(v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPADDBMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - return false } -func rewriteValueAMD64_OpBitLen64(v *Value) bool { +func rewriteValueAMD64_OpAddMaskedInt8x32(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (BitLen64 x) - // cond: buildcfg.GOAMD64 < 3 - // result: (ADDQconst [1] (CMOVQEQ (Select0 (BSRQ x)) (MOVQconst [-1]) (Select1 (BSRQ x)))) + // match: (AddMaskedInt8x32 x y mask) + // result: (VPADDBMasked256 x y (VPMOVVec8x32ToM mask)) for { - t := v.Type x := v_0 - if !(buildcfg.GOAMD64 < 3) { - break - } - v.reset(OpAMD64ADDQconst) - v.AuxInt = int32ToAuxInt(1) - v0 := b.NewValue0(v.Pos, OpAMD64CMOVQEQ, t) - v1 := b.NewValue0(v.Pos, OpSelect0, t) - v2 := b.NewValue0(v.Pos, OpAMD64BSRQ, types.NewTuple(typ.UInt64, types.TypeFlags)) - v2.AddArg(x) - v1.AddArg(v2) - v3 := b.NewValue0(v.Pos, OpAMD64MOVQconst, t) - v3.AuxInt = int64ToAuxInt(-1) - v4 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) - v4.AddArg(v2) - v0.AddArg3(v1, v3, v4) - v.AddArg(v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPADDBMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (BitLen64 x) - // cond: buildcfg.GOAMD64 >= 3 - // result: (NEGQ (ADDQconst [-64] (LZCNTQ x))) +} +func rewriteValueAMD64_OpAddMaskedInt8x64(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (AddMaskedInt8x64 x y mask) + // result: (VPADDBMasked512 x y (VPMOVVec8x64ToM mask)) for { - t := v.Type x := v_0 - if !(buildcfg.GOAMD64 >= 3) { - break - } - v.reset(OpAMD64NEGQ) - v0 := b.NewValue0(v.Pos, OpAMD64ADDQconst, t) - v0.AuxInt = int32ToAuxInt(-64) - v1 := b.NewValue0(v.Pos, OpAMD64LZCNTQ, typ.UInt64) - v1.AddArg(x) - v0.AddArg(v1) - v.AddArg(v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPADDBMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - return false } -func rewriteValueAMD64_OpBitLen8(v *Value) bool { +func rewriteValueAMD64_OpAddMaskedUint16x16(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (BitLen8 x) - // cond: buildcfg.GOAMD64 < 3 - // result: (BSRL (LEAL1 [1] (MOVBQZX x) (MOVBQZX x))) + // match: (AddMaskedUint16x16 x y mask) + // result: (VPADDWMasked256 x y (VPMOVVec16x16ToM mask)) for { x := v_0 - if !(buildcfg.GOAMD64 < 3) { - break - } - v.reset(OpAMD64BSRL) - v0 := b.NewValue0(v.Pos, OpAMD64LEAL1, typ.UInt32) - v0.AuxInt = int32ToAuxInt(1) - v1 := b.NewValue0(v.Pos, OpAMD64MOVBQZX, typ.UInt32) - v1.AddArg(x) - v0.AddArg2(v1, v1) - v.AddArg(v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPADDWMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (BitLen8 x) - // cond: buildcfg.GOAMD64 >= 3 - // result: (NEGQ (ADDQconst [-32] (LZCNTL (MOVBQZX x)))) +} +func rewriteValueAMD64_OpAddMaskedUint16x32(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (AddMaskedUint16x32 x y mask) + // result: (VPADDWMasked512 x y (VPMOVVec16x32ToM mask)) for { - t := v.Type x := v_0 - if !(buildcfg.GOAMD64 >= 3) { - break - } - v.reset(OpAMD64NEGQ) - v0 := b.NewValue0(v.Pos, OpAMD64ADDQconst, t) - v0.AuxInt = int32ToAuxInt(-32) - v1 := b.NewValue0(v.Pos, OpAMD64LZCNTL, typ.UInt32) - v2 := b.NewValue0(v.Pos, OpAMD64MOVBQZX, x.Type) - v2.AddArg(x) - v1.AddArg(v2) - v0.AddArg(v1) - v.AddArg(v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPADDWMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - return false } -func rewriteValueAMD64_OpBswap16(v *Value) bool { +func rewriteValueAMD64_OpAddMaskedUint16x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (Bswap16 x) - // result: (ROLWconst [8] x) + b := v.Block + // match: (AddMaskedUint16x8 x y mask) + // result: (VPADDWMasked128 x y (VPMOVVec16x8ToM mask)) for { x := v_0 - v.reset(OpAMD64ROLWconst) - v.AuxInt = int8ToAuxInt(8) - v.AddArg(x) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPADDWMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpCeil(v *Value) bool { +func rewriteValueAMD64_OpAddMaskedUint32x16(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (Ceil x) - // result: (ROUNDSD [2] x) + b := v.Block + // match: (AddMaskedUint32x16 x y mask) + // result: (VPADDDMasked512 x y (VPMOVVec32x16ToM mask)) for { x := v_0 - v.reset(OpAMD64ROUNDSD) - v.AuxInt = int8ToAuxInt(2) - v.AddArg(x) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPADDDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpCeilFloat32x4(v *Value) bool { +func rewriteValueAMD64_OpAddMaskedUint32x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (CeilFloat32x4 x) - // result: (VROUNDPS128 [2] x) + b := v.Block + // match: (AddMaskedUint32x4 x y mask) + // result: (VPADDDMasked128 x y (VPMOVVec32x4ToM mask)) for { x := v_0 - v.reset(OpAMD64VROUNDPS128) - v.AuxInt = int8ToAuxInt(2) - v.AddArg(x) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPADDDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpCeilFloat32x8(v *Value) bool { +func rewriteValueAMD64_OpAddMaskedUint32x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (CeilFloat32x8 x) - // result: (VROUNDPS256 [2] x) + b := v.Block + // match: (AddMaskedUint32x8 x y mask) + // result: (VPADDDMasked256 x y (VPMOVVec32x8ToM mask)) for { x := v_0 - v.reset(OpAMD64VROUNDPS256) - v.AuxInt = int8ToAuxInt(2) - v.AddArg(x) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPADDDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpCeilFloat64x2(v *Value) bool { +func rewriteValueAMD64_OpAddMaskedUint64x2(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (CeilFloat64x2 x) - // result: (VROUNDPD128 [2] x) + b := v.Block + // match: (AddMaskedUint64x2 x y mask) + // result: (VPADDQMasked128 x y (VPMOVVec64x2ToM mask)) for { x := v_0 - v.reset(OpAMD64VROUNDPD128) - v.AuxInt = int8ToAuxInt(2) - v.AddArg(x) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPADDQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpCeilFloat64x4(v *Value) bool { +func rewriteValueAMD64_OpAddMaskedUint64x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (CeilFloat64x4 x) - // result: (VROUNDPD256 [2] x) + b := v.Block + // match: (AddMaskedUint64x4 x y mask) + // result: (VPADDQMasked256 x y (VPMOVVec64x4ToM mask)) for { x := v_0 - v.reset(OpAMD64VROUNDPD256) - v.AuxInt = int8ToAuxInt(2) - v.AddArg(x) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPADDQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpCeilWithPrecisionFloat32x16(v *Value) bool { +func rewriteValueAMD64_OpAddMaskedUint64x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (CeilWithPrecisionFloat32x16 [a] x) - // result: (VRNDSCALEPS512 [a+2] x) + b := v.Block + // match: (AddMaskedUint64x8 x y mask) + // result: (VPADDQMasked512 x y (VPMOVVec64x8ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - v.reset(OpAMD64VRNDSCALEPS512) - v.AuxInt = int8ToAuxInt(a + 2) - v.AddArg(x) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPADDQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpCeilWithPrecisionFloat32x4(v *Value) bool { +func rewriteValueAMD64_OpAddMaskedUint8x16(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (CeilWithPrecisionFloat32x4 [a] x) - // result: (VRNDSCALEPS128 [a+2] x) + b := v.Block + // match: (AddMaskedUint8x16 x y mask) + // result: (VPADDBMasked128 x y (VPMOVVec8x16ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - v.reset(OpAMD64VRNDSCALEPS128) - v.AuxInt = int8ToAuxInt(a + 2) - v.AddArg(x) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPADDBMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpCeilWithPrecisionFloat32x8(v *Value) bool { +func rewriteValueAMD64_OpAddMaskedUint8x32(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (CeilWithPrecisionFloat32x8 [a] x) - // result: (VRNDSCALEPS256 [a+2] x) + b := v.Block + // match: (AddMaskedUint8x32 x y mask) + // result: (VPADDBMasked256 x y (VPMOVVec8x32ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - v.reset(OpAMD64VRNDSCALEPS256) - v.AuxInt = int8ToAuxInt(a + 2) - v.AddArg(x) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPADDBMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpCeilWithPrecisionFloat64x2(v *Value) bool { +func rewriteValueAMD64_OpAddMaskedUint8x64(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (CeilWithPrecisionFloat64x2 [a] x) - // result: (VRNDSCALEPD128 [a+2] x) + b := v.Block + // match: (AddMaskedUint8x64 x y mask) + // result: (VPADDBMasked512 x y (VPMOVVec8x64ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - v.reset(OpAMD64VRNDSCALEPD128) - v.AuxInt = int8ToAuxInt(a + 2) - v.AddArg(x) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPADDBMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpCeilWithPrecisionFloat64x4(v *Value) bool { +func rewriteValueAMD64_OpAddr(v *Value) bool { v_0 := v.Args[0] - // match: (CeilWithPrecisionFloat64x4 [a] x) - // result: (VRNDSCALEPD256 [a+2] x) + // match: (Addr {sym} base) + // result: (LEAQ {sym} base) for { - a := auxIntToInt8(v.AuxInt) - x := v_0 - v.reset(OpAMD64VRNDSCALEPD256) - v.AuxInt = int8ToAuxInt(a + 2) - v.AddArg(x) + sym := auxToSym(v.Aux) + base := v_0 + v.reset(OpAMD64LEAQ) + v.Aux = symToAux(sym) + v.AddArg(base) return true } } -func rewriteValueAMD64_OpCeilWithPrecisionFloat64x8(v *Value) bool { +func rewriteValueAMD64_OpAndMaskedInt32x16(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (CeilWithPrecisionFloat64x8 [a] x) - // result: (VRNDSCALEPD512 [a+2] x) + b := v.Block + // match: (AndMaskedInt32x16 x y mask) + // result: (VPANDDMasked512 x y (VPMOVVec32x16ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - v.reset(OpAMD64VRNDSCALEPD512) - v.AuxInt = int8ToAuxInt(a + 2) - v.AddArg(x) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPANDDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpCondSelect(v *Value) bool { +func rewriteValueAMD64_OpAndMaskedInt32x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (CondSelect x y (SETEQ cond)) - // cond: (is64BitInt(t) || isPtr(t)) - // result: (CMOVQEQ y x cond) + // match: (AndMaskedInt32x4 x y mask) + // result: (VPANDDMasked128 x y (VPMOVVec32x4ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if v_2.Op != OpAMD64SETEQ { - break - } - cond := v_2.Args[0] - if !(is64BitInt(t) || isPtr(t)) { - break - } - v.reset(OpAMD64CMOVQEQ) - v.AddArg3(y, x, cond) + mask := v_2 + v.reset(OpAMD64VPANDDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (CondSelect x y (SETNE cond)) - // cond: (is64BitInt(t) || isPtr(t)) - // result: (CMOVQNE y x cond) +} +func rewriteValueAMD64_OpAndMaskedInt32x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (AndMaskedInt32x8 x y mask) + // result: (VPANDDMasked256 x y (VPMOVVec32x8ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if v_2.Op != OpAMD64SETNE { - break - } - cond := v_2.Args[0] - if !(is64BitInt(t) || isPtr(t)) { - break - } - v.reset(OpAMD64CMOVQNE) - v.AddArg3(y, x, cond) + mask := v_2 + v.reset(OpAMD64VPANDDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (CondSelect x y (SETL cond)) - // cond: (is64BitInt(t) || isPtr(t)) - // result: (CMOVQLT y x cond) +} +func rewriteValueAMD64_OpAndMaskedInt64x2(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (AndMaskedInt64x2 x y mask) + // result: (VPANDQMasked128 x y (VPMOVVec64x2ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if v_2.Op != OpAMD64SETL { - break - } - cond := v_2.Args[0] - if !(is64BitInt(t) || isPtr(t)) { - break - } - v.reset(OpAMD64CMOVQLT) - v.AddArg3(y, x, cond) + mask := v_2 + v.reset(OpAMD64VPANDQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (CondSelect x y (SETG cond)) - // cond: (is64BitInt(t) || isPtr(t)) - // result: (CMOVQGT y x cond) +} +func rewriteValueAMD64_OpAndMaskedInt64x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (AndMaskedInt64x4 x y mask) + // result: (VPANDQMasked256 x y (VPMOVVec64x4ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if v_2.Op != OpAMD64SETG { - break - } - cond := v_2.Args[0] - if !(is64BitInt(t) || isPtr(t)) { - break - } - v.reset(OpAMD64CMOVQGT) - v.AddArg3(y, x, cond) + mask := v_2 + v.reset(OpAMD64VPANDQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (CondSelect x y (SETLE cond)) - // cond: (is64BitInt(t) || isPtr(t)) - // result: (CMOVQLE y x cond) +} +func rewriteValueAMD64_OpAndMaskedInt64x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (AndMaskedInt64x8 x y mask) + // result: (VPANDQMasked512 x y (VPMOVVec64x8ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if v_2.Op != OpAMD64SETLE { - break - } - cond := v_2.Args[0] - if !(is64BitInt(t) || isPtr(t)) { - break - } - v.reset(OpAMD64CMOVQLE) - v.AddArg3(y, x, cond) + mask := v_2 + v.reset(OpAMD64VPANDQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (CondSelect x y (SETGE cond)) - // cond: (is64BitInt(t) || isPtr(t)) - // result: (CMOVQGE y x cond) +} +func rewriteValueAMD64_OpAndMaskedUint32x16(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (AndMaskedUint32x16 x y mask) + // result: (VPANDDMasked512 x y (VPMOVVec32x16ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if v_2.Op != OpAMD64SETGE { - break - } - cond := v_2.Args[0] - if !(is64BitInt(t) || isPtr(t)) { - break - } - v.reset(OpAMD64CMOVQGE) - v.AddArg3(y, x, cond) + mask := v_2 + v.reset(OpAMD64VPANDDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (CondSelect x y (SETA cond)) - // cond: (is64BitInt(t) || isPtr(t)) - // result: (CMOVQHI y x cond) +} +func rewriteValueAMD64_OpAndMaskedUint32x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (AndMaskedUint32x4 x y mask) + // result: (VPANDDMasked128 x y (VPMOVVec32x4ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if v_2.Op != OpAMD64SETA { - break - } - cond := v_2.Args[0] - if !(is64BitInt(t) || isPtr(t)) { - break - } - v.reset(OpAMD64CMOVQHI) - v.AddArg3(y, x, cond) + mask := v_2 + v.reset(OpAMD64VPANDDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (CondSelect x y (SETB cond)) - // cond: (is64BitInt(t) || isPtr(t)) - // result: (CMOVQCS y x cond) +} +func rewriteValueAMD64_OpAndMaskedUint32x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (AndMaskedUint32x8 x y mask) + // result: (VPANDDMasked256 x y (VPMOVVec32x8ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if v_2.Op != OpAMD64SETB { - break - } - cond := v_2.Args[0] - if !(is64BitInt(t) || isPtr(t)) { - break - } - v.reset(OpAMD64CMOVQCS) - v.AddArg3(y, x, cond) - return true - } - // match: (CondSelect x y (SETAE cond)) - // cond: (is64BitInt(t) || isPtr(t)) - // result: (CMOVQCC y x cond) - for { - t := v.Type - x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETAE { - break - } - cond := v_2.Args[0] - if !(is64BitInt(t) || isPtr(t)) { - break - } - v.reset(OpAMD64CMOVQCC) - v.AddArg3(y, x, cond) + mask := v_2 + v.reset(OpAMD64VPANDDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (CondSelect x y (SETBE cond)) - // cond: (is64BitInt(t) || isPtr(t)) - // result: (CMOVQLS y x cond) +} +func rewriteValueAMD64_OpAndMaskedUint64x2(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (AndMaskedUint64x2 x y mask) + // result: (VPANDQMasked128 x y (VPMOVVec64x2ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if v_2.Op != OpAMD64SETBE { - break - } - cond := v_2.Args[0] - if !(is64BitInt(t) || isPtr(t)) { - break - } - v.reset(OpAMD64CMOVQLS) - v.AddArg3(y, x, cond) + mask := v_2 + v.reset(OpAMD64VPANDQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (CondSelect x y (SETEQF cond)) - // cond: (is64BitInt(t) || isPtr(t)) - // result: (CMOVQEQF y x cond) +} +func rewriteValueAMD64_OpAndMaskedUint64x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (AndMaskedUint64x4 x y mask) + // result: (VPANDQMasked256 x y (VPMOVVec64x4ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if v_2.Op != OpAMD64SETEQF { - break - } - cond := v_2.Args[0] - if !(is64BitInt(t) || isPtr(t)) { - break - } - v.reset(OpAMD64CMOVQEQF) - v.AddArg3(y, x, cond) + mask := v_2 + v.reset(OpAMD64VPANDQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (CondSelect x y (SETNEF cond)) - // cond: (is64BitInt(t) || isPtr(t)) - // result: (CMOVQNEF y x cond) +} +func rewriteValueAMD64_OpAndMaskedUint64x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (AndMaskedUint64x8 x y mask) + // result: (VPANDQMasked512 x y (VPMOVVec64x8ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if v_2.Op != OpAMD64SETNEF { - break - } - cond := v_2.Args[0] - if !(is64BitInt(t) || isPtr(t)) { - break - } - v.reset(OpAMD64CMOVQNEF) - v.AddArg3(y, x, cond) + mask := v_2 + v.reset(OpAMD64VPANDQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (CondSelect x y (SETGF cond)) - // cond: (is64BitInt(t) || isPtr(t)) - // result: (CMOVQGTF y x cond) +} +func rewriteValueAMD64_OpAndNotMaskedInt32x16(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (AndNotMaskedInt32x16 x y mask) + // result: (VPANDNDMasked512 x y (VPMOVVec32x16ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if v_2.Op != OpAMD64SETGF { - break - } - cond := v_2.Args[0] - if !(is64BitInt(t) || isPtr(t)) { - break - } - v.reset(OpAMD64CMOVQGTF) - v.AddArg3(y, x, cond) + mask := v_2 + v.reset(OpAMD64VPANDNDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (CondSelect x y (SETGEF cond)) - // cond: (is64BitInt(t) || isPtr(t)) - // result: (CMOVQGEF y x cond) +} +func rewriteValueAMD64_OpAndNotMaskedInt32x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (AndNotMaskedInt32x4 x y mask) + // result: (VPANDNDMasked128 x y (VPMOVVec32x4ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if v_2.Op != OpAMD64SETGEF { - break - } - cond := v_2.Args[0] - if !(is64BitInt(t) || isPtr(t)) { - break - } - v.reset(OpAMD64CMOVQGEF) - v.AddArg3(y, x, cond) + mask := v_2 + v.reset(OpAMD64VPANDNDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (CondSelect x y (SETEQ cond)) - // cond: is32BitInt(t) - // result: (CMOVLEQ y x cond) +} +func rewriteValueAMD64_OpAndNotMaskedInt32x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (AndNotMaskedInt32x8 x y mask) + // result: (VPANDNDMasked256 x y (VPMOVVec32x8ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if v_2.Op != OpAMD64SETEQ { - break - } - cond := v_2.Args[0] - if !(is32BitInt(t)) { - break - } - v.reset(OpAMD64CMOVLEQ) - v.AddArg3(y, x, cond) + mask := v_2 + v.reset(OpAMD64VPANDNDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (CondSelect x y (SETNE cond)) - // cond: is32BitInt(t) - // result: (CMOVLNE y x cond) +} +func rewriteValueAMD64_OpAndNotMaskedInt64x2(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (AndNotMaskedInt64x2 x y mask) + // result: (VPANDNQMasked128 x y (VPMOVVec64x2ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if v_2.Op != OpAMD64SETNE { - break - } - cond := v_2.Args[0] - if !(is32BitInt(t)) { - break - } - v.reset(OpAMD64CMOVLNE) - v.AddArg3(y, x, cond) + mask := v_2 + v.reset(OpAMD64VPANDNQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (CondSelect x y (SETL cond)) - // cond: is32BitInt(t) - // result: (CMOVLLT y x cond) +} +func rewriteValueAMD64_OpAndNotMaskedInt64x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (AndNotMaskedInt64x4 x y mask) + // result: (VPANDNQMasked256 x y (VPMOVVec64x4ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if v_2.Op != OpAMD64SETL { - break - } - cond := v_2.Args[0] - if !(is32BitInt(t)) { - break - } - v.reset(OpAMD64CMOVLLT) - v.AddArg3(y, x, cond) + mask := v_2 + v.reset(OpAMD64VPANDNQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (CondSelect x y (SETG cond)) - // cond: is32BitInt(t) - // result: (CMOVLGT y x cond) +} +func rewriteValueAMD64_OpAndNotMaskedInt64x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (AndNotMaskedInt64x8 x y mask) + // result: (VPANDNQMasked512 x y (VPMOVVec64x8ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if v_2.Op != OpAMD64SETG { - break - } - cond := v_2.Args[0] - if !(is32BitInt(t)) { - break - } - v.reset(OpAMD64CMOVLGT) - v.AddArg3(y, x, cond) + mask := v_2 + v.reset(OpAMD64VPANDNQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (CondSelect x y (SETLE cond)) - // cond: is32BitInt(t) - // result: (CMOVLLE y x cond) +} +func rewriteValueAMD64_OpAndNotMaskedUint32x16(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (AndNotMaskedUint32x16 x y mask) + // result: (VPANDNDMasked512 x y (VPMOVVec32x16ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if v_2.Op != OpAMD64SETLE { - break - } - cond := v_2.Args[0] - if !(is32BitInt(t)) { - break - } - v.reset(OpAMD64CMOVLLE) - v.AddArg3(y, x, cond) + mask := v_2 + v.reset(OpAMD64VPANDNDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (CondSelect x y (SETGE cond)) - // cond: is32BitInt(t) - // result: (CMOVLGE y x cond) +} +func rewriteValueAMD64_OpAndNotMaskedUint32x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (AndNotMaskedUint32x4 x y mask) + // result: (VPANDNDMasked128 x y (VPMOVVec32x4ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if v_2.Op != OpAMD64SETGE { - break - } - cond := v_2.Args[0] - if !(is32BitInt(t)) { - break - } - v.reset(OpAMD64CMOVLGE) - v.AddArg3(y, x, cond) + mask := v_2 + v.reset(OpAMD64VPANDNDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (CondSelect x y (SETA cond)) - // cond: is32BitInt(t) - // result: (CMOVLHI y x cond) +} +func rewriteValueAMD64_OpAndNotMaskedUint32x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (AndNotMaskedUint32x8 x y mask) + // result: (VPANDNDMasked256 x y (VPMOVVec32x8ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if v_2.Op != OpAMD64SETA { - break - } - cond := v_2.Args[0] - if !(is32BitInt(t)) { - break - } - v.reset(OpAMD64CMOVLHI) - v.AddArg3(y, x, cond) + mask := v_2 + v.reset(OpAMD64VPANDNDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (CondSelect x y (SETB cond)) - // cond: is32BitInt(t) - // result: (CMOVLCS y x cond) +} +func rewriteValueAMD64_OpAndNotMaskedUint64x2(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (AndNotMaskedUint64x2 x y mask) + // result: (VPANDNQMasked128 x y (VPMOVVec64x2ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if v_2.Op != OpAMD64SETB { - break - } - cond := v_2.Args[0] - if !(is32BitInt(t)) { - break - } - v.reset(OpAMD64CMOVLCS) - v.AddArg3(y, x, cond) + mask := v_2 + v.reset(OpAMD64VPANDNQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (CondSelect x y (SETAE cond)) - // cond: is32BitInt(t) - // result: (CMOVLCC y x cond) +} +func rewriteValueAMD64_OpAndNotMaskedUint64x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (AndNotMaskedUint64x4 x y mask) + // result: (VPANDNQMasked256 x y (VPMOVVec64x4ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if v_2.Op != OpAMD64SETAE { - break - } - cond := v_2.Args[0] - if !(is32BitInt(t)) { - break - } - v.reset(OpAMD64CMOVLCC) - v.AddArg3(y, x, cond) + mask := v_2 + v.reset(OpAMD64VPANDNQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (CondSelect x y (SETBE cond)) - // cond: is32BitInt(t) - // result: (CMOVLLS y x cond) +} +func rewriteValueAMD64_OpAndNotMaskedUint64x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (AndNotMaskedUint64x8 x y mask) + // result: (VPANDNQMasked512 x y (VPMOVVec64x8ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if v_2.Op != OpAMD64SETBE { - break - } - cond := v_2.Args[0] - if !(is32BitInt(t)) { - break - } - v.reset(OpAMD64CMOVLLS) - v.AddArg3(y, x, cond) + mask := v_2 + v.reset(OpAMD64VPANDNQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (CondSelect x y (SETEQF cond)) - // cond: is32BitInt(t) - // result: (CMOVLEQF y x cond) +} +func rewriteValueAMD64_OpApproximateReciprocalMaskedFloat32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ApproximateReciprocalMaskedFloat32x16 x mask) + // result: (VRCP14PSMasked512 x (VPMOVVec32x16ToM mask)) for { - t := v.Type x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETEQF { - break - } - cond := v_2.Args[0] - if !(is32BitInt(t)) { - break - } - v.reset(OpAMD64CMOVLEQF) - v.AddArg3(y, x, cond) + mask := v_1 + v.reset(OpAMD64VRCP14PSMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } - // match: (CondSelect x y (SETNEF cond)) - // cond: is32BitInt(t) - // result: (CMOVLNEF y x cond) +} +func rewriteValueAMD64_OpApproximateReciprocalMaskedFloat32x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ApproximateReciprocalMaskedFloat32x4 x mask) + // result: (VRCP14PSMasked128 x (VPMOVVec32x4ToM mask)) for { - t := v.Type x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETNEF { - break - } - cond := v_2.Args[0] - if !(is32BitInt(t)) { - break - } - v.reset(OpAMD64CMOVLNEF) - v.AddArg3(y, x, cond) + mask := v_1 + v.reset(OpAMD64VRCP14PSMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } - // match: (CondSelect x y (SETGF cond)) - // cond: is32BitInt(t) - // result: (CMOVLGTF y x cond) +} +func rewriteValueAMD64_OpApproximateReciprocalMaskedFloat32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ApproximateReciprocalMaskedFloat32x8 x mask) + // result: (VRCP14PSMasked256 x (VPMOVVec32x8ToM mask)) for { - t := v.Type x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETGF { - break - } - cond := v_2.Args[0] - if !(is32BitInt(t)) { - break - } - v.reset(OpAMD64CMOVLGTF) - v.AddArg3(y, x, cond) + mask := v_1 + v.reset(OpAMD64VRCP14PSMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } - // match: (CondSelect x y (SETGEF cond)) - // cond: is32BitInt(t) - // result: (CMOVLGEF y x cond) +} +func rewriteValueAMD64_OpApproximateReciprocalMaskedFloat64x2(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ApproximateReciprocalMaskedFloat64x2 x mask) + // result: (VRCP14PDMasked128 x (VPMOVVec64x2ToM mask)) for { - t := v.Type x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETGEF { - break - } - cond := v_2.Args[0] - if !(is32BitInt(t)) { - break - } - v.reset(OpAMD64CMOVLGEF) - v.AddArg3(y, x, cond) + mask := v_1 + v.reset(OpAMD64VRCP14PDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } - // match: (CondSelect x y (SETEQ cond)) - // cond: is16BitInt(t) - // result: (CMOVWEQ y x cond) +} +func rewriteValueAMD64_OpApproximateReciprocalMaskedFloat64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ApproximateReciprocalMaskedFloat64x4 x mask) + // result: (VRCP14PDMasked256 x (VPMOVVec64x4ToM mask)) for { - t := v.Type x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETEQ { - break - } - cond := v_2.Args[0] - if !(is16BitInt(t)) { - break - } - v.reset(OpAMD64CMOVWEQ) - v.AddArg3(y, x, cond) + mask := v_1 + v.reset(OpAMD64VRCP14PDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } - // match: (CondSelect x y (SETNE cond)) - // cond: is16BitInt(t) - // result: (CMOVWNE y x cond) +} +func rewriteValueAMD64_OpApproximateReciprocalMaskedFloat64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ApproximateReciprocalMaskedFloat64x8 x mask) + // result: (VRCP14PDMasked512 x (VPMOVVec64x8ToM mask)) for { - t := v.Type x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETNE { - break - } - cond := v_2.Args[0] - if !(is16BitInt(t)) { - break - } - v.reset(OpAMD64CMOVWNE) - v.AddArg3(y, x, cond) - return true - } - // match: (CondSelect x y (SETL cond)) - // cond: is16BitInt(t) - // result: (CMOVWLT y x cond) - for { - t := v.Type - x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETL { - break - } - cond := v_2.Args[0] - if !(is16BitInt(t)) { - break - } - v.reset(OpAMD64CMOVWLT) - v.AddArg3(y, x, cond) - return true - } - // match: (CondSelect x y (SETG cond)) - // cond: is16BitInt(t) - // result: (CMOVWGT y x cond) - for { - t := v.Type - x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETG { - break - } - cond := v_2.Args[0] - if !(is16BitInt(t)) { - break - } - v.reset(OpAMD64CMOVWGT) - v.AddArg3(y, x, cond) - return true - } - // match: (CondSelect x y (SETLE cond)) - // cond: is16BitInt(t) - // result: (CMOVWLE y x cond) - for { - t := v.Type - x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETLE { - break - } - cond := v_2.Args[0] - if !(is16BitInt(t)) { - break - } - v.reset(OpAMD64CMOVWLE) - v.AddArg3(y, x, cond) - return true - } - // match: (CondSelect x y (SETGE cond)) - // cond: is16BitInt(t) - // result: (CMOVWGE y x cond) - for { - t := v.Type - x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETGE { - break - } - cond := v_2.Args[0] - if !(is16BitInt(t)) { - break - } - v.reset(OpAMD64CMOVWGE) - v.AddArg3(y, x, cond) - return true - } - // match: (CondSelect x y (SETA cond)) - // cond: is16BitInt(t) - // result: (CMOVWHI y x cond) - for { - t := v.Type - x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETA { - break - } - cond := v_2.Args[0] - if !(is16BitInt(t)) { - break - } - v.reset(OpAMD64CMOVWHI) - v.AddArg3(y, x, cond) - return true - } - // match: (CondSelect x y (SETB cond)) - // cond: is16BitInt(t) - // result: (CMOVWCS y x cond) - for { - t := v.Type - x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETB { - break - } - cond := v_2.Args[0] - if !(is16BitInt(t)) { - break - } - v.reset(OpAMD64CMOVWCS) - v.AddArg3(y, x, cond) - return true - } - // match: (CondSelect x y (SETAE cond)) - // cond: is16BitInt(t) - // result: (CMOVWCC y x cond) - for { - t := v.Type - x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETAE { - break - } - cond := v_2.Args[0] - if !(is16BitInt(t)) { - break - } - v.reset(OpAMD64CMOVWCC) - v.AddArg3(y, x, cond) - return true - } - // match: (CondSelect x y (SETBE cond)) - // cond: is16BitInt(t) - // result: (CMOVWLS y x cond) - for { - t := v.Type - x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETBE { - break - } - cond := v_2.Args[0] - if !(is16BitInt(t)) { - break - } - v.reset(OpAMD64CMOVWLS) - v.AddArg3(y, x, cond) - return true - } - // match: (CondSelect x y (SETEQF cond)) - // cond: is16BitInt(t) - // result: (CMOVWEQF y x cond) - for { - t := v.Type - x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETEQF { - break - } - cond := v_2.Args[0] - if !(is16BitInt(t)) { - break - } - v.reset(OpAMD64CMOVWEQF) - v.AddArg3(y, x, cond) - return true - } - // match: (CondSelect x y (SETNEF cond)) - // cond: is16BitInt(t) - // result: (CMOVWNEF y x cond) - for { - t := v.Type - x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETNEF { - break - } - cond := v_2.Args[0] - if !(is16BitInt(t)) { - break - } - v.reset(OpAMD64CMOVWNEF) - v.AddArg3(y, x, cond) - return true - } - // match: (CondSelect x y (SETGF cond)) - // cond: is16BitInt(t) - // result: (CMOVWGTF y x cond) - for { - t := v.Type - x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETGF { - break - } - cond := v_2.Args[0] - if !(is16BitInt(t)) { - break - } - v.reset(OpAMD64CMOVWGTF) - v.AddArg3(y, x, cond) - return true - } - // match: (CondSelect x y (SETGEF cond)) - // cond: is16BitInt(t) - // result: (CMOVWGEF y x cond) - for { - t := v.Type - x := v_0 - y := v_1 - if v_2.Op != OpAMD64SETGEF { - break - } - cond := v_2.Args[0] - if !(is16BitInt(t)) { - break - } - v.reset(OpAMD64CMOVWGEF) - v.AddArg3(y, x, cond) - return true - } - // match: (CondSelect x y check) - // cond: !check.Type.IsFlags() && check.Type.Size() == 1 - // result: (CondSelect x y (MOVBQZX check)) - for { - t := v.Type - x := v_0 - y := v_1 - check := v_2 - if !(!check.Type.IsFlags() && check.Type.Size() == 1) { - break - } - v.reset(OpCondSelect) - v.Type = t - v0 := b.NewValue0(v.Pos, OpAMD64MOVBQZX, typ.UInt64) - v0.AddArg(check) - v.AddArg3(x, y, v0) - return true - } - // match: (CondSelect x y check) - // cond: !check.Type.IsFlags() && check.Type.Size() == 2 - // result: (CondSelect x y (MOVWQZX check)) - for { - t := v.Type - x := v_0 - y := v_1 - check := v_2 - if !(!check.Type.IsFlags() && check.Type.Size() == 2) { - break - } - v.reset(OpCondSelect) - v.Type = t - v0 := b.NewValue0(v.Pos, OpAMD64MOVWQZX, typ.UInt64) - v0.AddArg(check) - v.AddArg3(x, y, v0) - return true - } - // match: (CondSelect x y check) - // cond: !check.Type.IsFlags() && check.Type.Size() == 4 - // result: (CondSelect x y (MOVLQZX check)) - for { - t := v.Type - x := v_0 - y := v_1 - check := v_2 - if !(!check.Type.IsFlags() && check.Type.Size() == 4) { - break - } - v.reset(OpCondSelect) - v.Type = t - v0 := b.NewValue0(v.Pos, OpAMD64MOVLQZX, typ.UInt64) - v0.AddArg(check) - v.AddArg3(x, y, v0) - return true - } - // match: (CondSelect x y check) - // cond: !check.Type.IsFlags() && check.Type.Size() == 8 && (is64BitInt(t) || isPtr(t)) - // result: (CMOVQNE y x (CMPQconst [0] check)) - for { - t := v.Type - x := v_0 - y := v_1 - check := v_2 - if !(!check.Type.IsFlags() && check.Type.Size() == 8 && (is64BitInt(t) || isPtr(t))) { - break - } - v.reset(OpAMD64CMOVQNE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) - v0.AuxInt = int32ToAuxInt(0) - v0.AddArg(check) - v.AddArg3(y, x, v0) - return true - } - // match: (CondSelect x y check) - // cond: !check.Type.IsFlags() && check.Type.Size() == 8 && is32BitInt(t) - // result: (CMOVLNE y x (CMPQconst [0] check)) - for { - t := v.Type - x := v_0 - y := v_1 - check := v_2 - if !(!check.Type.IsFlags() && check.Type.Size() == 8 && is32BitInt(t)) { - break - } - v.reset(OpAMD64CMOVLNE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) - v0.AuxInt = int32ToAuxInt(0) - v0.AddArg(check) - v.AddArg3(y, x, v0) - return true - } - // match: (CondSelect x y check) - // cond: !check.Type.IsFlags() && check.Type.Size() == 8 && is16BitInt(t) - // result: (CMOVWNE y x (CMPQconst [0] check)) - for { - t := v.Type - x := v_0 - y := v_1 - check := v_2 - if !(!check.Type.IsFlags() && check.Type.Size() == 8 && is16BitInt(t)) { - break - } - v.reset(OpAMD64CMOVWNE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) - v0.AuxInt = int32ToAuxInt(0) - v0.AddArg(check) - v.AddArg3(y, x, v0) - return true - } - return false -} -func rewriteValueAMD64_OpConst16(v *Value) bool { - // match: (Const16 [c]) - // result: (MOVLconst [int32(c)]) - for { - c := auxIntToInt16(v.AuxInt) - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(int32(c)) - return true - } -} -func rewriteValueAMD64_OpConst8(v *Value) bool { - // match: (Const8 [c]) - // result: (MOVLconst [int32(c)]) - for { - c := auxIntToInt8(v.AuxInt) - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(int32(c)) - return true - } -} -func rewriteValueAMD64_OpConstBool(v *Value) bool { - // match: (ConstBool [c]) - // result: (MOVLconst [b2i32(c)]) - for { - c := auxIntToBool(v.AuxInt) - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(b2i32(c)) - return true - } -} -func rewriteValueAMD64_OpConstNil(v *Value) bool { - // match: (ConstNil ) - // result: (MOVQconst [0]) - for { - v.reset(OpAMD64MOVQconst) - v.AuxInt = int64ToAuxInt(0) + mask := v_1 + v.reset(OpAMD64VRCP14PDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpCtz16(v *Value) bool { +func rewriteValueAMD64_OpApproximateReciprocalOfSqrtMaskedFloat32x16(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (Ctz16 x) - // result: (BSFL (ORLconst [1<<16] x)) + // match: (ApproximateReciprocalOfSqrtMaskedFloat32x16 x mask) + // result: (VRSQRT14PSMasked512 x (VPMOVVec32x16ToM mask)) for { x := v_0 - v.reset(OpAMD64BSFL) - v0 := b.NewValue0(v.Pos, OpAMD64ORLconst, typ.UInt32) - v0.AuxInt = int32ToAuxInt(1 << 16) - v0.AddArg(x) - v.AddArg(v0) + mask := v_1 + v.reset(OpAMD64VRSQRT14PSMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpCtz16NonZero(v *Value) bool { +func rewriteValueAMD64_OpApproximateReciprocalOfSqrtMaskedFloat32x4(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (Ctz16NonZero x) - // cond: buildcfg.GOAMD64 >= 3 - // result: (TZCNTL x) - for { - x := v_0 - if !(buildcfg.GOAMD64 >= 3) { - break - } - v.reset(OpAMD64TZCNTL) - v.AddArg(x) - return true - } - // match: (Ctz16NonZero x) - // cond: buildcfg.GOAMD64 < 3 - // result: (BSFL x) + b := v.Block + // match: (ApproximateReciprocalOfSqrtMaskedFloat32x4 x mask) + // result: (VRSQRT14PSMasked128 x (VPMOVVec32x4ToM mask)) for { x := v_0 - if !(buildcfg.GOAMD64 < 3) { - break - } - v.reset(OpAMD64BSFL) - v.AddArg(x) + mask := v_1 + v.reset(OpAMD64VRSQRT14PSMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } - return false } -func rewriteValueAMD64_OpCtz32(v *Value) bool { +func rewriteValueAMD64_OpApproximateReciprocalOfSqrtMaskedFloat32x8(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (Ctz32 x) - // cond: buildcfg.GOAMD64 >= 3 - // result: (TZCNTL x) - for { - x := v_0 - if !(buildcfg.GOAMD64 >= 3) { - break - } - v.reset(OpAMD64TZCNTL) - v.AddArg(x) - return true - } - // match: (Ctz32 x) - // cond: buildcfg.GOAMD64 < 3 - // result: (Select0 (BSFQ (BTSQconst [32] x))) + // match: (ApproximateReciprocalOfSqrtMaskedFloat32x8 x mask) + // result: (VRSQRT14PSMasked256 x (VPMOVVec32x8ToM mask)) for { x := v_0 - if !(buildcfg.GOAMD64 < 3) { - break - } - v.reset(OpSelect0) - v0 := b.NewValue0(v.Pos, OpAMD64BSFQ, types.NewTuple(typ.UInt64, types.TypeFlags)) - v1 := b.NewValue0(v.Pos, OpAMD64BTSQconst, typ.UInt64) - v1.AuxInt = int8ToAuxInt(32) - v1.AddArg(x) - v0.AddArg(v1) - v.AddArg(v0) + mask := v_1 + v.reset(OpAMD64VRSQRT14PSMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } - return false } -func rewriteValueAMD64_OpCtz32NonZero(v *Value) bool { +func rewriteValueAMD64_OpApproximateReciprocalOfSqrtMaskedFloat64x2(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (Ctz32NonZero x) - // cond: buildcfg.GOAMD64 >= 3 - // result: (TZCNTL x) - for { - x := v_0 - if !(buildcfg.GOAMD64 >= 3) { - break - } - v.reset(OpAMD64TZCNTL) - v.AddArg(x) - return true - } - // match: (Ctz32NonZero x) - // cond: buildcfg.GOAMD64 < 3 - // result: (BSFL x) + b := v.Block + // match: (ApproximateReciprocalOfSqrtMaskedFloat64x2 x mask) + // result: (VRSQRT14PDMasked128 x (VPMOVVec64x2ToM mask)) for { x := v_0 - if !(buildcfg.GOAMD64 < 3) { - break - } - v.reset(OpAMD64BSFL) - v.AddArg(x) + mask := v_1 + v.reset(OpAMD64VRSQRT14PDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } - return false } -func rewriteValueAMD64_OpCtz64(v *Value) bool { +func rewriteValueAMD64_OpApproximateReciprocalOfSqrtMaskedFloat64x4(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (Ctz64 x) - // cond: buildcfg.GOAMD64 >= 3 - // result: (TZCNTQ x) - for { - x := v_0 - if !(buildcfg.GOAMD64 >= 3) { - break - } - v.reset(OpAMD64TZCNTQ) - v.AddArg(x) - return true - } - // match: (Ctz64 x) - // cond: buildcfg.GOAMD64 < 3 - // result: (CMOVQEQ (Select0 (BSFQ x)) (MOVQconst [64]) (Select1 (BSFQ x))) + // match: (ApproximateReciprocalOfSqrtMaskedFloat64x4 x mask) + // result: (VRSQRT14PDMasked256 x (VPMOVVec64x4ToM mask)) for { - t := v.Type x := v_0 - if !(buildcfg.GOAMD64 < 3) { - break - } - v.reset(OpAMD64CMOVQEQ) - v0 := b.NewValue0(v.Pos, OpSelect0, t) - v1 := b.NewValue0(v.Pos, OpAMD64BSFQ, types.NewTuple(typ.UInt64, types.TypeFlags)) - v1.AddArg(x) - v0.AddArg(v1) - v2 := b.NewValue0(v.Pos, OpAMD64MOVQconst, t) - v2.AuxInt = int64ToAuxInt(64) - v3 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) - v3.AddArg(v1) - v.AddArg3(v0, v2, v3) + mask := v_1 + v.reset(OpAMD64VRSQRT14PDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } - return false } -func rewriteValueAMD64_OpCtz64NonZero(v *Value) bool { +func rewriteValueAMD64_OpApproximateReciprocalOfSqrtMaskedFloat64x8(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (Ctz64NonZero x) - // cond: buildcfg.GOAMD64 >= 3 - // result: (TZCNTQ x) - for { - x := v_0 - if !(buildcfg.GOAMD64 >= 3) { - break - } - v.reset(OpAMD64TZCNTQ) - v.AddArg(x) - return true - } - // match: (Ctz64NonZero x) - // cond: buildcfg.GOAMD64 < 3 - // result: (Select0 (BSFQ x)) + // match: (ApproximateReciprocalOfSqrtMaskedFloat64x8 x mask) + // result: (VRSQRT14PDMasked512 x (VPMOVVec64x8ToM mask)) for { x := v_0 - if !(buildcfg.GOAMD64 < 3) { - break - } - v.reset(OpSelect0) - v0 := b.NewValue0(v.Pos, OpAMD64BSFQ, types.NewTuple(typ.UInt64, types.TypeFlags)) - v0.AddArg(x) - v.AddArg(v0) + mask := v_1 + v.reset(OpAMD64VRSQRT14PDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } - return false } -func rewriteValueAMD64_OpCtz8(v *Value) bool { +func rewriteValueAMD64_OpAtomicAdd32(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (Ctz8 x) - // result: (BSFL (ORLconst [1<<8 ] x)) - for { - x := v_0 - v.reset(OpAMD64BSFL) - v0 := b.NewValue0(v.Pos, OpAMD64ORLconst, typ.UInt32) - v0.AuxInt = int32ToAuxInt(1 << 8) - v0.AddArg(x) - v.AddArg(v0) - return true - } -} -func rewriteValueAMD64_OpCtz8NonZero(v *Value) bool { - v_0 := v.Args[0] - // match: (Ctz8NonZero x) - // cond: buildcfg.GOAMD64 >= 3 - // result: (TZCNTL x) - for { - x := v_0 - if !(buildcfg.GOAMD64 >= 3) { - break - } - v.reset(OpAMD64TZCNTL) - v.AddArg(x) - return true - } - // match: (Ctz8NonZero x) - // cond: buildcfg.GOAMD64 < 3 - // result: (BSFL x) - for { - x := v_0 - if !(buildcfg.GOAMD64 < 3) { - break - } - v.reset(OpAMD64BSFL) - v.AddArg(x) - return true - } - return false -} -func rewriteValueAMD64_OpDiffWithCeilWithPrecisionFloat32x16(v *Value) bool { - v_0 := v.Args[0] - // match: (DiffWithCeilWithPrecisionFloat32x16 [a] x) - // result: (VREDUCEPS512 [a+2] x) + // match: (AtomicAdd32 ptr val mem) + // result: (AddTupleFirst32 val (XADDLlock val ptr mem)) for { - a := auxIntToInt8(v.AuxInt) - x := v_0 - v.reset(OpAMD64VREDUCEPS512) - v.AuxInt = int8ToAuxInt(a + 2) - v.AddArg(x) + ptr := v_0 + val := v_1 + mem := v_2 + v.reset(OpAMD64AddTupleFirst32) + v0 := b.NewValue0(v.Pos, OpAMD64XADDLlock, types.NewTuple(typ.UInt32, types.TypeMem)) + v0.AddArg3(val, ptr, mem) + v.AddArg2(val, v0) return true } } -func rewriteValueAMD64_OpDiffWithCeilWithPrecisionFloat32x4(v *Value) bool { +func rewriteValueAMD64_OpAtomicAdd64(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (DiffWithCeilWithPrecisionFloat32x4 [a] x) - // result: (VREDUCEPS128 [a+2] x) + b := v.Block + typ := &b.Func.Config.Types + // match: (AtomicAdd64 ptr val mem) + // result: (AddTupleFirst64 val (XADDQlock val ptr mem)) for { - a := auxIntToInt8(v.AuxInt) - x := v_0 - v.reset(OpAMD64VREDUCEPS128) - v.AuxInt = int8ToAuxInt(a + 2) - v.AddArg(x) + ptr := v_0 + val := v_1 + mem := v_2 + v.reset(OpAMD64AddTupleFirst64) + v0 := b.NewValue0(v.Pos, OpAMD64XADDQlock, types.NewTuple(typ.UInt64, types.TypeMem)) + v0.AddArg3(val, ptr, mem) + v.AddArg2(val, v0) return true } } -func rewriteValueAMD64_OpDiffWithCeilWithPrecisionFloat32x8(v *Value) bool { +func rewriteValueAMD64_OpAtomicAnd32(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (DiffWithCeilWithPrecisionFloat32x8 [a] x) - // result: (VREDUCEPS256 [a+2] x) + // match: (AtomicAnd32 ptr val mem) + // result: (ANDLlock ptr val mem) for { - a := auxIntToInt8(v.AuxInt) - x := v_0 - v.reset(OpAMD64VREDUCEPS256) - v.AuxInt = int8ToAuxInt(a + 2) - v.AddArg(x) + ptr := v_0 + val := v_1 + mem := v_2 + v.reset(OpAMD64ANDLlock) + v.AddArg3(ptr, val, mem) return true } } -func rewriteValueAMD64_OpDiffWithCeilWithPrecisionFloat64x2(v *Value) bool { +func rewriteValueAMD64_OpAtomicAnd32value(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (DiffWithCeilWithPrecisionFloat64x2 [a] x) - // result: (VREDUCEPD128 [a+2] x) + // match: (AtomicAnd32value ptr val mem) + // result: (LoweredAtomicAnd32 ptr val mem) for { - a := auxIntToInt8(v.AuxInt) - x := v_0 - v.reset(OpAMD64VREDUCEPD128) - v.AuxInt = int8ToAuxInt(a + 2) - v.AddArg(x) + ptr := v_0 + val := v_1 + mem := v_2 + v.reset(OpAMD64LoweredAtomicAnd32) + v.AddArg3(ptr, val, mem) return true } } -func rewriteValueAMD64_OpDiffWithCeilWithPrecisionFloat64x4(v *Value) bool { +func rewriteValueAMD64_OpAtomicAnd64value(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (DiffWithCeilWithPrecisionFloat64x4 [a] x) - // result: (VREDUCEPD256 [a+2] x) + // match: (AtomicAnd64value ptr val mem) + // result: (LoweredAtomicAnd64 ptr val mem) for { - a := auxIntToInt8(v.AuxInt) - x := v_0 - v.reset(OpAMD64VREDUCEPD256) - v.AuxInt = int8ToAuxInt(a + 2) - v.AddArg(x) + ptr := v_0 + val := v_1 + mem := v_2 + v.reset(OpAMD64LoweredAtomicAnd64) + v.AddArg3(ptr, val, mem) return true } } -func rewriteValueAMD64_OpDiffWithCeilWithPrecisionFloat64x8(v *Value) bool { +func rewriteValueAMD64_OpAtomicAnd8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (DiffWithCeilWithPrecisionFloat64x8 [a] x) - // result: (VREDUCEPD512 [a+2] x) + // match: (AtomicAnd8 ptr val mem) + // result: (ANDBlock ptr val mem) for { - a := auxIntToInt8(v.AuxInt) - x := v_0 - v.reset(OpAMD64VREDUCEPD512) - v.AuxInt = int8ToAuxInt(a + 2) - v.AddArg(x) + ptr := v_0 + val := v_1 + mem := v_2 + v.reset(OpAMD64ANDBlock) + v.AddArg3(ptr, val, mem) return true } } -func rewriteValueAMD64_OpDiffWithFloorWithPrecisionFloat32x16(v *Value) bool { +func rewriteValueAMD64_OpAtomicCompareAndSwap32(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (DiffWithFloorWithPrecisionFloat32x16 [a] x) - // result: (VREDUCEPS512 [a+1] x) + // match: (AtomicCompareAndSwap32 ptr old new_ mem) + // result: (CMPXCHGLlock ptr old new_ mem) for { - a := auxIntToInt8(v.AuxInt) - x := v_0 - v.reset(OpAMD64VREDUCEPS512) - v.AuxInt = int8ToAuxInt(a + 1) - v.AddArg(x) + ptr := v_0 + old := v_1 + new_ := v_2 + mem := v_3 + v.reset(OpAMD64CMPXCHGLlock) + v.AddArg4(ptr, old, new_, mem) return true } } -func rewriteValueAMD64_OpDiffWithFloorWithPrecisionFloat32x4(v *Value) bool { +func rewriteValueAMD64_OpAtomicCompareAndSwap64(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (DiffWithFloorWithPrecisionFloat32x4 [a] x) - // result: (VREDUCEPS128 [a+1] x) + // match: (AtomicCompareAndSwap64 ptr old new_ mem) + // result: (CMPXCHGQlock ptr old new_ mem) for { - a := auxIntToInt8(v.AuxInt) - x := v_0 - v.reset(OpAMD64VREDUCEPS128) - v.AuxInt = int8ToAuxInt(a + 1) - v.AddArg(x) + ptr := v_0 + old := v_1 + new_ := v_2 + mem := v_3 + v.reset(OpAMD64CMPXCHGQlock) + v.AddArg4(ptr, old, new_, mem) return true } } -func rewriteValueAMD64_OpDiffWithFloorWithPrecisionFloat32x8(v *Value) bool { +func rewriteValueAMD64_OpAtomicExchange32(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (DiffWithFloorWithPrecisionFloat32x8 [a] x) - // result: (VREDUCEPS256 [a+1] x) + // match: (AtomicExchange32 ptr val mem) + // result: (XCHGL val ptr mem) for { - a := auxIntToInt8(v.AuxInt) - x := v_0 - v.reset(OpAMD64VREDUCEPS256) - v.AuxInt = int8ToAuxInt(a + 1) - v.AddArg(x) + ptr := v_0 + val := v_1 + mem := v_2 + v.reset(OpAMD64XCHGL) + v.AddArg3(val, ptr, mem) return true } } -func rewriteValueAMD64_OpDiffWithFloorWithPrecisionFloat64x2(v *Value) bool { +func rewriteValueAMD64_OpAtomicExchange64(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (DiffWithFloorWithPrecisionFloat64x2 [a] x) - // result: (VREDUCEPD128 [a+1] x) + // match: (AtomicExchange64 ptr val mem) + // result: (XCHGQ val ptr mem) for { - a := auxIntToInt8(v.AuxInt) - x := v_0 - v.reset(OpAMD64VREDUCEPD128) - v.AuxInt = int8ToAuxInt(a + 1) - v.AddArg(x) + ptr := v_0 + val := v_1 + mem := v_2 + v.reset(OpAMD64XCHGQ) + v.AddArg3(val, ptr, mem) return true } } -func rewriteValueAMD64_OpDiffWithFloorWithPrecisionFloat64x4(v *Value) bool { +func rewriteValueAMD64_OpAtomicExchange8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (DiffWithFloorWithPrecisionFloat64x4 [a] x) - // result: (VREDUCEPD256 [a+1] x) + // match: (AtomicExchange8 ptr val mem) + // result: (XCHGB val ptr mem) for { - a := auxIntToInt8(v.AuxInt) - x := v_0 - v.reset(OpAMD64VREDUCEPD256) - v.AuxInt = int8ToAuxInt(a + 1) - v.AddArg(x) + ptr := v_0 + val := v_1 + mem := v_2 + v.reset(OpAMD64XCHGB) + v.AddArg3(val, ptr, mem) return true } } -func rewriteValueAMD64_OpDiffWithFloorWithPrecisionFloat64x8(v *Value) bool { +func rewriteValueAMD64_OpAtomicLoad32(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (DiffWithFloorWithPrecisionFloat64x8 [a] x) - // result: (VREDUCEPD512 [a+1] x) + // match: (AtomicLoad32 ptr mem) + // result: (MOVLatomicload ptr mem) for { - a := auxIntToInt8(v.AuxInt) - x := v_0 - v.reset(OpAMD64VREDUCEPD512) - v.AuxInt = int8ToAuxInt(a + 1) - v.AddArg(x) + ptr := v_0 + mem := v_1 + v.reset(OpAMD64MOVLatomicload) + v.AddArg2(ptr, mem) return true } } -func rewriteValueAMD64_OpDiffWithRoundWithPrecisionFloat32x16(v *Value) bool { +func rewriteValueAMD64_OpAtomicLoad64(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (DiffWithRoundWithPrecisionFloat32x16 [a] x) - // result: (VREDUCEPS512 [a+0] x) + // match: (AtomicLoad64 ptr mem) + // result: (MOVQatomicload ptr mem) for { - a := auxIntToInt8(v.AuxInt) - x := v_0 - v.reset(OpAMD64VREDUCEPS512) - v.AuxInt = int8ToAuxInt(a + 0) - v.AddArg(x) + ptr := v_0 + mem := v_1 + v.reset(OpAMD64MOVQatomicload) + v.AddArg2(ptr, mem) return true } } -func rewriteValueAMD64_OpDiffWithRoundWithPrecisionFloat32x4(v *Value) bool { +func rewriteValueAMD64_OpAtomicLoad8(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (DiffWithRoundWithPrecisionFloat32x4 [a] x) - // result: (VREDUCEPS128 [a+0] x) + // match: (AtomicLoad8 ptr mem) + // result: (MOVBatomicload ptr mem) for { - a := auxIntToInt8(v.AuxInt) - x := v_0 - v.reset(OpAMD64VREDUCEPS128) - v.AuxInt = int8ToAuxInt(a + 0) - v.AddArg(x) + ptr := v_0 + mem := v_1 + v.reset(OpAMD64MOVBatomicload) + v.AddArg2(ptr, mem) return true } } -func rewriteValueAMD64_OpDiffWithRoundWithPrecisionFloat32x8(v *Value) bool { +func rewriteValueAMD64_OpAtomicLoadPtr(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (DiffWithRoundWithPrecisionFloat32x8 [a] x) - // result: (VREDUCEPS256 [a+0] x) + // match: (AtomicLoadPtr ptr mem) + // result: (MOVQatomicload ptr mem) for { - a := auxIntToInt8(v.AuxInt) - x := v_0 - v.reset(OpAMD64VREDUCEPS256) - v.AuxInt = int8ToAuxInt(a + 0) - v.AddArg(x) + ptr := v_0 + mem := v_1 + v.reset(OpAMD64MOVQatomicload) + v.AddArg2(ptr, mem) return true } } -func rewriteValueAMD64_OpDiffWithRoundWithPrecisionFloat64x2(v *Value) bool { +func rewriteValueAMD64_OpAtomicOr32(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (DiffWithRoundWithPrecisionFloat64x2 [a] x) - // result: (VREDUCEPD128 [a+0] x) + // match: (AtomicOr32 ptr val mem) + // result: (ORLlock ptr val mem) for { - a := auxIntToInt8(v.AuxInt) - x := v_0 - v.reset(OpAMD64VREDUCEPD128) - v.AuxInt = int8ToAuxInt(a + 0) - v.AddArg(x) + ptr := v_0 + val := v_1 + mem := v_2 + v.reset(OpAMD64ORLlock) + v.AddArg3(ptr, val, mem) return true } } -func rewriteValueAMD64_OpDiffWithRoundWithPrecisionFloat64x4(v *Value) bool { +func rewriteValueAMD64_OpAtomicOr32value(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (DiffWithRoundWithPrecisionFloat64x4 [a] x) - // result: (VREDUCEPD256 [a+0] x) + // match: (AtomicOr32value ptr val mem) + // result: (LoweredAtomicOr32 ptr val mem) for { - a := auxIntToInt8(v.AuxInt) - x := v_0 - v.reset(OpAMD64VREDUCEPD256) - v.AuxInt = int8ToAuxInt(a + 0) - v.AddArg(x) + ptr := v_0 + val := v_1 + mem := v_2 + v.reset(OpAMD64LoweredAtomicOr32) + v.AddArg3(ptr, val, mem) return true } } -func rewriteValueAMD64_OpDiffWithRoundWithPrecisionFloat64x8(v *Value) bool { +func rewriteValueAMD64_OpAtomicOr64value(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (DiffWithRoundWithPrecisionFloat64x8 [a] x) - // result: (VREDUCEPD512 [a+0] x) + // match: (AtomicOr64value ptr val mem) + // result: (LoweredAtomicOr64 ptr val mem) for { - a := auxIntToInt8(v.AuxInt) - x := v_0 - v.reset(OpAMD64VREDUCEPD512) - v.AuxInt = int8ToAuxInt(a + 0) - v.AddArg(x) + ptr := v_0 + val := v_1 + mem := v_2 + v.reset(OpAMD64LoweredAtomicOr64) + v.AddArg3(ptr, val, mem) return true } } -func rewriteValueAMD64_OpDiffWithTruncWithPrecisionFloat32x16(v *Value) bool { +func rewriteValueAMD64_OpAtomicOr8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (DiffWithTruncWithPrecisionFloat32x16 [a] x) - // result: (VREDUCEPS512 [a+3] x) + // match: (AtomicOr8 ptr val mem) + // result: (ORBlock ptr val mem) for { - a := auxIntToInt8(v.AuxInt) - x := v_0 - v.reset(OpAMD64VREDUCEPS512) - v.AuxInt = int8ToAuxInt(a + 3) - v.AddArg(x) + ptr := v_0 + val := v_1 + mem := v_2 + v.reset(OpAMD64ORBlock) + v.AddArg3(ptr, val, mem) return true } } -func rewriteValueAMD64_OpDiffWithTruncWithPrecisionFloat32x4(v *Value) bool { +func rewriteValueAMD64_OpAtomicStore32(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (DiffWithTruncWithPrecisionFloat32x4 [a] x) - // result: (VREDUCEPS128 [a+3] x) + b := v.Block + typ := &b.Func.Config.Types + // match: (AtomicStore32 ptr val mem) + // result: (Select1 (XCHGL val ptr mem)) for { - a := auxIntToInt8(v.AuxInt) - x := v_0 - v.reset(OpAMD64VREDUCEPS128) - v.AuxInt = int8ToAuxInt(a + 3) - v.AddArg(x) + ptr := v_0 + val := v_1 + mem := v_2 + v.reset(OpSelect1) + v0 := b.NewValue0(v.Pos, OpAMD64XCHGL, types.NewTuple(typ.UInt32, types.TypeMem)) + v0.AddArg3(val, ptr, mem) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpDiffWithTruncWithPrecisionFloat32x8(v *Value) bool { +func rewriteValueAMD64_OpAtomicStore64(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (DiffWithTruncWithPrecisionFloat32x8 [a] x) - // result: (VREDUCEPS256 [a+3] x) + b := v.Block + typ := &b.Func.Config.Types + // match: (AtomicStore64 ptr val mem) + // result: (Select1 (XCHGQ val ptr mem)) for { - a := auxIntToInt8(v.AuxInt) - x := v_0 - v.reset(OpAMD64VREDUCEPS256) - v.AuxInt = int8ToAuxInt(a + 3) - v.AddArg(x) + ptr := v_0 + val := v_1 + mem := v_2 + v.reset(OpSelect1) + v0 := b.NewValue0(v.Pos, OpAMD64XCHGQ, types.NewTuple(typ.UInt64, types.TypeMem)) + v0.AddArg3(val, ptr, mem) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpDiffWithTruncWithPrecisionFloat64x2(v *Value) bool { +func rewriteValueAMD64_OpAtomicStore8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (DiffWithTruncWithPrecisionFloat64x2 [a] x) - // result: (VREDUCEPD128 [a+3] x) + b := v.Block + typ := &b.Func.Config.Types + // match: (AtomicStore8 ptr val mem) + // result: (Select1 (XCHGB val ptr mem)) for { - a := auxIntToInt8(v.AuxInt) - x := v_0 - v.reset(OpAMD64VREDUCEPD128) - v.AuxInt = int8ToAuxInt(a + 3) - v.AddArg(x) + ptr := v_0 + val := v_1 + mem := v_2 + v.reset(OpSelect1) + v0 := b.NewValue0(v.Pos, OpAMD64XCHGB, types.NewTuple(typ.UInt8, types.TypeMem)) + v0.AddArg3(val, ptr, mem) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpDiffWithTruncWithPrecisionFloat64x4(v *Value) bool { +func rewriteValueAMD64_OpAtomicStorePtrNoWB(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (DiffWithTruncWithPrecisionFloat64x4 [a] x) - // result: (VREDUCEPD256 [a+3] x) + b := v.Block + typ := &b.Func.Config.Types + // match: (AtomicStorePtrNoWB ptr val mem) + // result: (Select1 (XCHGQ val ptr mem)) for { - a := auxIntToInt8(v.AuxInt) - x := v_0 - v.reset(OpAMD64VREDUCEPD256) - v.AuxInt = int8ToAuxInt(a + 3) - v.AddArg(x) + ptr := v_0 + val := v_1 + mem := v_2 + v.reset(OpSelect1) + v0 := b.NewValue0(v.Pos, OpAMD64XCHGQ, types.NewTuple(typ.BytePtr, types.TypeMem)) + v0.AddArg3(val, ptr, mem) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpDiffWithTruncWithPrecisionFloat64x8(v *Value) bool { +func rewriteValueAMD64_OpAverageMaskedUint16x16(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (DiffWithTruncWithPrecisionFloat64x8 [a] x) - // result: (VREDUCEPD512 [a+3] x) + b := v.Block + // match: (AverageMaskedUint16x16 x y mask) + // result: (VPAVGWMasked256 x y (VPMOVVec16x16ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - v.reset(OpAMD64VREDUCEPD512) - v.AuxInt = int8ToAuxInt(a + 3) - v.AddArg(x) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPAVGWMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpDiv16(v *Value) bool { +func rewriteValueAMD64_OpAverageMaskedUint16x32(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (Div16 [a] x y) - // result: (Select0 (DIVW [a] x y)) + // match: (AverageMaskedUint16x32 x y mask) + // result: (VPAVGWMasked512 x y (VPMOVVec16x32ToM mask)) for { - a := auxIntToBool(v.AuxInt) x := v_0 y := v_1 - v.reset(OpSelect0) - v0 := b.NewValue0(v.Pos, OpAMD64DIVW, types.NewTuple(typ.Int16, typ.Int16)) - v0.AuxInt = boolToAuxInt(a) - v0.AddArg2(x, y) - v.AddArg(v0) + mask := v_2 + v.reset(OpAMD64VPAVGWMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpDiv16u(v *Value) bool { +func rewriteValueAMD64_OpAverageMaskedUint16x8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (Div16u x y) - // result: (Select0 (DIVWU x y)) + // match: (AverageMaskedUint16x8 x y mask) + // result: (VPAVGWMasked128 x y (VPMOVVec16x8ToM mask)) for { x := v_0 y := v_1 - v.reset(OpSelect0) - v0 := b.NewValue0(v.Pos, OpAMD64DIVWU, types.NewTuple(typ.UInt16, typ.UInt16)) - v0.AddArg2(x, y) - v.AddArg(v0) + mask := v_2 + v.reset(OpAMD64VPAVGWMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpDiv32(v *Value) bool { +func rewriteValueAMD64_OpAverageMaskedUint8x16(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (Div32 [a] x y) - // result: (Select0 (DIVL [a] x y)) + // match: (AverageMaskedUint8x16 x y mask) + // result: (VPAVGBMasked128 x y (VPMOVVec8x16ToM mask)) for { - a := auxIntToBool(v.AuxInt) x := v_0 y := v_1 - v.reset(OpSelect0) - v0 := b.NewValue0(v.Pos, OpAMD64DIVL, types.NewTuple(typ.Int32, typ.Int32)) - v0.AuxInt = boolToAuxInt(a) - v0.AddArg2(x, y) - v.AddArg(v0) + mask := v_2 + v.reset(OpAMD64VPAVGBMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpDiv32u(v *Value) bool { +func rewriteValueAMD64_OpAverageMaskedUint8x32(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (Div32u x y) - // result: (Select0 (DIVLU x y)) + // match: (AverageMaskedUint8x32 x y mask) + // result: (VPAVGBMasked256 x y (VPMOVVec8x32ToM mask)) for { x := v_0 y := v_1 - v.reset(OpSelect0) - v0 := b.NewValue0(v.Pos, OpAMD64DIVLU, types.NewTuple(typ.UInt32, typ.UInt32)) - v0.AddArg2(x, y) - v.AddArg(v0) + mask := v_2 + v.reset(OpAMD64VPAVGBMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpDiv64(v *Value) bool { +func rewriteValueAMD64_OpAverageMaskedUint8x64(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (Div64 [a] x y) - // result: (Select0 (DIVQ [a] x y)) + // match: (AverageMaskedUint8x64 x y mask) + // result: (VPAVGBMasked512 x y (VPMOVVec8x64ToM mask)) for { - a := auxIntToBool(v.AuxInt) x := v_0 y := v_1 - v.reset(OpSelect0) - v0 := b.NewValue0(v.Pos, OpAMD64DIVQ, types.NewTuple(typ.Int64, typ.Int64)) - v0.AuxInt = boolToAuxInt(a) - v0.AddArg2(x, y) - v.AddArg(v0) + mask := v_2 + v.reset(OpAMD64VPAVGBMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpDiv64u(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpBitLen16(v *Value) bool { v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (Div64u x y) - // result: (Select0 (DIVQU x y)) + // match: (BitLen16 x) + // cond: buildcfg.GOAMD64 < 3 + // result: (BSRL (LEAL1 [1] (MOVWQZX x) (MOVWQZX x))) for { x := v_0 - y := v_1 - v.reset(OpSelect0) - v0 := b.NewValue0(v.Pos, OpAMD64DIVQU, types.NewTuple(typ.UInt64, typ.UInt64)) - v0.AddArg2(x, y) + if !(buildcfg.GOAMD64 < 3) { + break + } + v.reset(OpAMD64BSRL) + v0 := b.NewValue0(v.Pos, OpAMD64LEAL1, typ.UInt32) + v0.AuxInt = int32ToAuxInt(1) + v1 := b.NewValue0(v.Pos, OpAMD64MOVWQZX, typ.UInt32) + v1.AddArg(x) + v0.AddArg2(v1, v1) + v.AddArg(v0) + return true + } + // match: (BitLen16 x) + // cond: buildcfg.GOAMD64 >= 3 + // result: (NEGQ (ADDQconst [-32] (LZCNTL (MOVWQZX x)))) + for { + t := v.Type + x := v_0 + if !(buildcfg.GOAMD64 >= 3) { + break + } + v.reset(OpAMD64NEGQ) + v0 := b.NewValue0(v.Pos, OpAMD64ADDQconst, t) + v0.AuxInt = int32ToAuxInt(-32) + v1 := b.NewValue0(v.Pos, OpAMD64LZCNTL, typ.UInt32) + v2 := b.NewValue0(v.Pos, OpAMD64MOVWQZX, x.Type) + v2.AddArg(x) + v1.AddArg(v2) + v0.AddArg(v1) v.AddArg(v0) return true } + return false } -func rewriteValueAMD64_OpDiv8(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpBitLen32(v *Value) bool { v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (Div8 x y) - // result: (Select0 (DIVW (SignExt8to16 x) (SignExt8to16 y))) + // match: (BitLen32 x) + // cond: buildcfg.GOAMD64 < 3 + // result: (Select0 (BSRQ (LEAQ1 [1] (MOVLQZX x) (MOVLQZX x)))) for { x := v_0 - y := v_1 + if !(buildcfg.GOAMD64 < 3) { + break + } v.reset(OpSelect0) - v0 := b.NewValue0(v.Pos, OpAMD64DIVW, types.NewTuple(typ.Int16, typ.Int16)) - v1 := b.NewValue0(v.Pos, OpSignExt8to16, typ.Int16) - v1.AddArg(x) - v2 := b.NewValue0(v.Pos, OpSignExt8to16, typ.Int16) - v2.AddArg(y) - v0.AddArg2(v1, v2) + v0 := b.NewValue0(v.Pos, OpAMD64BSRQ, types.NewTuple(typ.UInt64, types.TypeFlags)) + v1 := b.NewValue0(v.Pos, OpAMD64LEAQ1, typ.UInt64) + v1.AuxInt = int32ToAuxInt(1) + v2 := b.NewValue0(v.Pos, OpAMD64MOVLQZX, typ.UInt64) + v2.AddArg(x) + v1.AddArg2(v2, v2) + v0.AddArg(v1) v.AddArg(v0) return true } -} -func rewriteValueAMD64_OpDiv8u(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (Div8u x y) - // result: (Select0 (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y))) + // match: (BitLen32 x) + // cond: buildcfg.GOAMD64 >= 3 + // result: (NEGQ (ADDQconst [-32] (LZCNTL x))) for { + t := v.Type x := v_0 - y := v_1 - v.reset(OpSelect0) - v0 := b.NewValue0(v.Pos, OpAMD64DIVWU, types.NewTuple(typ.UInt16, typ.UInt16)) - v1 := b.NewValue0(v.Pos, OpZeroExt8to16, typ.UInt16) + if !(buildcfg.GOAMD64 >= 3) { + break + } + v.reset(OpAMD64NEGQ) + v0 := b.NewValue0(v.Pos, OpAMD64ADDQconst, t) + v0.AuxInt = int32ToAuxInt(-32) + v1 := b.NewValue0(v.Pos, OpAMD64LZCNTL, typ.UInt32) v1.AddArg(x) - v2 := b.NewValue0(v.Pos, OpZeroExt8to16, typ.UInt16) - v2.AddArg(y) - v0.AddArg2(v1, v2) + v0.AddArg(v1) v.AddArg(v0) return true } + return false } -func rewriteValueAMD64_OpDotProdBroadcastFloat64x2(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpBitLen64(v *Value) bool { v_0 := v.Args[0] - // match: (DotProdBroadcastFloat64x2 x y) - // result: (VDPPD128 [127] x y) + b := v.Block + typ := &b.Func.Config.Types + // match: (BitLen64 x) + // cond: buildcfg.GOAMD64 < 3 + // result: (ADDQconst [1] (CMOVQEQ (Select0 (BSRQ x)) (MOVQconst [-1]) (Select1 (BSRQ x)))) for { + t := v.Type x := v_0 - y := v_1 - v.reset(OpAMD64VDPPD128) - v.AuxInt = int8ToAuxInt(127) - v.AddArg2(x, y) + if !(buildcfg.GOAMD64 < 3) { + break + } + v.reset(OpAMD64ADDQconst) + v.AuxInt = int32ToAuxInt(1) + v0 := b.NewValue0(v.Pos, OpAMD64CMOVQEQ, t) + v1 := b.NewValue0(v.Pos, OpSelect0, t) + v2 := b.NewValue0(v.Pos, OpAMD64BSRQ, types.NewTuple(typ.UInt64, types.TypeFlags)) + v2.AddArg(x) + v1.AddArg(v2) + v3 := b.NewValue0(v.Pos, OpAMD64MOVQconst, t) + v3.AuxInt = int64ToAuxInt(-1) + v4 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) + v4.AddArg(v2) + v0.AddArg3(v1, v3, v4) + v.AddArg(v0) return true } -} -func rewriteValueAMD64_OpEq16(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (Eq16 x y) - // result: (SETEQ (CMPW x y)) + // match: (BitLen64 x) + // cond: buildcfg.GOAMD64 >= 3 + // result: (NEGQ (ADDQconst [-64] (LZCNTQ x))) for { + t := v.Type x := v_0 - y := v_1 - v.reset(OpAMD64SETEQ) - v0 := b.NewValue0(v.Pos, OpAMD64CMPW, types.TypeFlags) - v0.AddArg2(x, y) + if !(buildcfg.GOAMD64 >= 3) { + break + } + v.reset(OpAMD64NEGQ) + v0 := b.NewValue0(v.Pos, OpAMD64ADDQconst, t) + v0.AuxInt = int32ToAuxInt(-64) + v1 := b.NewValue0(v.Pos, OpAMD64LZCNTQ, typ.UInt64) + v1.AddArg(x) + v0.AddArg(v1) v.AddArg(v0) return true } + return false } -func rewriteValueAMD64_OpEq32(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpBitLen8(v *Value) bool { v_0 := v.Args[0] b := v.Block - // match: (Eq32 x y) - // result: (SETEQ (CMPL x y)) + typ := &b.Func.Config.Types + // match: (BitLen8 x) + // cond: buildcfg.GOAMD64 < 3 + // result: (BSRL (LEAL1 [1] (MOVBQZX x) (MOVBQZX x))) for { x := v_0 - y := v_1 - v.reset(OpAMD64SETEQ) - v0 := b.NewValue0(v.Pos, OpAMD64CMPL, types.TypeFlags) - v0.AddArg2(x, y) + if !(buildcfg.GOAMD64 < 3) { + break + } + v.reset(OpAMD64BSRL) + v0 := b.NewValue0(v.Pos, OpAMD64LEAL1, typ.UInt32) + v0.AuxInt = int32ToAuxInt(1) + v1 := b.NewValue0(v.Pos, OpAMD64MOVBQZX, typ.UInt32) + v1.AddArg(x) + v0.AddArg2(v1, v1) v.AddArg(v0) return true } -} -func rewriteValueAMD64_OpEq32F(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (Eq32F x y) - // result: (SETEQF (UCOMISS x y)) + // match: (BitLen8 x) + // cond: buildcfg.GOAMD64 >= 3 + // result: (NEGQ (ADDQconst [-32] (LZCNTL (MOVBQZX x)))) for { + t := v.Type x := v_0 - y := v_1 - v.reset(OpAMD64SETEQF) - v0 := b.NewValue0(v.Pos, OpAMD64UCOMISS, types.TypeFlags) - v0.AddArg2(x, y) + if !(buildcfg.GOAMD64 >= 3) { + break + } + v.reset(OpAMD64NEGQ) + v0 := b.NewValue0(v.Pos, OpAMD64ADDQconst, t) + v0.AuxInt = int32ToAuxInt(-32) + v1 := b.NewValue0(v.Pos, OpAMD64LZCNTL, typ.UInt32) + v2 := b.NewValue0(v.Pos, OpAMD64MOVBQZX, x.Type) + v2.AddArg(x) + v1.AddArg(v2) + v0.AddArg(v1) v.AddArg(v0) return true } + return false } -func rewriteValueAMD64_OpEq64(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpBswap16(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (Eq64 x y) - // result: (SETEQ (CMPQ x y)) + // match: (Bswap16 x) + // result: (ROLWconst [8] x) for { x := v_0 - y := v_1 - v.reset(OpAMD64SETEQ) - v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) + v.reset(OpAMD64ROLWconst) + v.AuxInt = int8ToAuxInt(8) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpEq64F(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpCeil(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (Eq64F x y) - // result: (SETEQF (UCOMISD x y)) + // match: (Ceil x) + // result: (ROUNDSD [2] x) for { x := v_0 - y := v_1 - v.reset(OpAMD64SETEQF) - v0 := b.NewValue0(v.Pos, OpAMD64UCOMISD, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) + v.reset(OpAMD64ROUNDSD) + v.AuxInt = int8ToAuxInt(2) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpEq8(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpCeilFloat32x4(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (Eq8 x y) - // result: (SETEQ (CMPB x y)) + // match: (CeilFloat32x4 x) + // result: (VROUNDPS128 [2] x) for { x := v_0 - y := v_1 - v.reset(OpAMD64SETEQ) - v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) + v.reset(OpAMD64VROUNDPS128) + v.AuxInt = int8ToAuxInt(2) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpEqB(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpCeilFloat32x8(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (EqB x y) - // result: (SETEQ (CMPB x y)) + // match: (CeilFloat32x8 x) + // result: (VROUNDPS256 [2] x) for { x := v_0 - y := v_1 - v.reset(OpAMD64SETEQ) - v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) + v.reset(OpAMD64VROUNDPS256) + v.AuxInt = int8ToAuxInt(2) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpEqPtr(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpCeilFloat64x2(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (EqPtr x y) - // result: (SETEQ (CMPQ x y)) + // match: (CeilFloat64x2 x) + // result: (VROUNDPD128 [2] x) for { x := v_0 - y := v_1 - v.reset(OpAMD64SETEQ) - v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) + v.reset(OpAMD64VROUNDPD128) + v.AuxInt = int8ToAuxInt(2) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpEqualFloat32x16(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpCeilFloat64x4(v *Value) bool { v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (EqualFloat32x16 x y) - // result: (VPMOVMToVec32x16 (VCMPPS512 [0] x y)) + // match: (CeilFloat64x4 x) + // result: (VROUNDPD256 [2] x) for { x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask) - v0.AuxInt = int8ToAuxInt(0) - v0.AddArg2(x, y) - v.AddArg(v0) + v.reset(OpAMD64VROUNDPD256) + v.AuxInt = int8ToAuxInt(2) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpEqualFloat32x4(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpCeilWithPrecisionFloat32x16(v *Value) bool { v_0 := v.Args[0] - // match: (EqualFloat32x4 x y) - // result: (VCMPPS128 [0] x y) + // match: (CeilWithPrecisionFloat32x16 [a] x) + // result: (VRNDSCALEPS512 [a+2] x) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VCMPPS128) - v.AuxInt = int8ToAuxInt(0) - v.AddArg2(x, y) + v.reset(OpAMD64VRNDSCALEPS512) + v.AuxInt = int8ToAuxInt(a + 2) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpEqualFloat32x8(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpCeilWithPrecisionFloat32x4(v *Value) bool { v_0 := v.Args[0] - // match: (EqualFloat32x8 x y) - // result: (VCMPPS256 [0] x y) + // match: (CeilWithPrecisionFloat32x4 [a] x) + // result: (VRNDSCALEPS128 [a+2] x) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VCMPPS256) - v.AuxInt = int8ToAuxInt(0) - v.AddArg2(x, y) + v.reset(OpAMD64VRNDSCALEPS128) + v.AuxInt = int8ToAuxInt(a + 2) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpEqualFloat64x2(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpCeilWithPrecisionFloat32x8(v *Value) bool { v_0 := v.Args[0] - // match: (EqualFloat64x2 x y) - // result: (VCMPPD128 [0] x y) + // match: (CeilWithPrecisionFloat32x8 [a] x) + // result: (VRNDSCALEPS256 [a+2] x) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VCMPPD128) - v.AuxInt = int8ToAuxInt(0) - v.AddArg2(x, y) + v.reset(OpAMD64VRNDSCALEPS256) + v.AuxInt = int8ToAuxInt(a + 2) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpEqualFloat64x4(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpCeilWithPrecisionFloat64x2(v *Value) bool { v_0 := v.Args[0] - // match: (EqualFloat64x4 x y) - // result: (VCMPPD256 [0] x y) + // match: (CeilWithPrecisionFloat64x2 [a] x) + // result: (VRNDSCALEPD128 [a+2] x) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VCMPPD256) - v.AuxInt = int8ToAuxInt(0) - v.AddArg2(x, y) + v.reset(OpAMD64VRNDSCALEPD128) + v.AuxInt = int8ToAuxInt(a + 2) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpEqualFloat64x8(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpCeilWithPrecisionFloat64x4(v *Value) bool { v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (EqualFloat64x8 x y) - // result: (VPMOVMToVec64x8 (VCMPPD512 [0] x y)) + // match: (CeilWithPrecisionFloat64x4 [a] x) + // result: (VRNDSCALEPD256 [a+2] x) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask) - v0.AuxInt = int8ToAuxInt(0) - v0.AddArg2(x, y) - v.AddArg(v0) + v.reset(OpAMD64VRNDSCALEPD256) + v.AuxInt = int8ToAuxInt(a + 2) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpEqualInt16x32(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpCeilWithPrecisionFloat64x8(v *Value) bool { v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (EqualInt16x32 x y) - // result: (VPMOVMToVec16x32 (VPCMPW512 [0] x y)) + // match: (CeilWithPrecisionFloat64x8 [a] x) + // result: (VRNDSCALEPD512 [a+2] x) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec16x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW512, typ.Mask) - v0.AuxInt = int8ToAuxInt(0) - v0.AddArg2(x, y) - v.AddArg(v0) + v.reset(OpAMD64VRNDSCALEPD512) + v.AuxInt = int8ToAuxInt(a + 2) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpEqualInt32x16(v *Value) bool { +func rewriteValueAMD64_OpCeilWithPrecisionMaskedFloat32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (EqualInt32x16 x y) - // result: (VPMOVMToVec32x16 (VPCMPD512 [0] x y)) + // match: (CeilWithPrecisionMaskedFloat32x16 [a] x mask) + // result: (VRNDSCALEPSMasked512 [a+2] x (VPMOVVec32x16ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD512, typ.Mask) - v0.AuxInt = int8ToAuxInt(0) - v0.AddArg2(x, y) - v.AddArg(v0) + mask := v_1 + v.reset(OpAMD64VRNDSCALEPSMasked512) + v.AuxInt = int8ToAuxInt(a + 2) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpEqualInt64x8(v *Value) bool { +func rewriteValueAMD64_OpCeilWithPrecisionMaskedFloat32x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (EqualInt64x8 x y) - // result: (VPMOVMToVec64x8 (VPCMPQ512 [0] x y)) + // match: (CeilWithPrecisionMaskedFloat32x4 [a] x mask) + // result: (VRNDSCALEPSMasked128 [a+2] x (VPMOVVec32x4ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ512, typ.Mask) - v0.AuxInt = int8ToAuxInt(0) - v0.AddArg2(x, y) - v.AddArg(v0) + mask := v_1 + v.reset(OpAMD64VRNDSCALEPSMasked128) + v.AuxInt = int8ToAuxInt(a + 2) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpEqualInt8x64(v *Value) bool { +func rewriteValueAMD64_OpCeilWithPrecisionMaskedFloat32x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (EqualInt8x64 x y) - // result: (VPMOVMToVec8x64 (VPCMPB512 [0] x y)) + // match: (CeilWithPrecisionMaskedFloat32x8 [a] x mask) + // result: (VRNDSCALEPSMasked256 [a+2] x (VPMOVVec32x8ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec8x64) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB512, typ.Mask) - v0.AuxInt = int8ToAuxInt(0) - v0.AddArg2(x, y) - v.AddArg(v0) + mask := v_1 + v.reset(OpAMD64VRNDSCALEPSMasked256) + v.AuxInt = int8ToAuxInt(a + 2) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpEqualUint16x16(v *Value) bool { +func rewriteValueAMD64_OpCeilWithPrecisionMaskedFloat64x2(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (EqualUint16x16 x y) - // result: (VPMOVMToVec16x16 (VPCMPUW256 [0] x y)) + // match: (CeilWithPrecisionMaskedFloat64x2 [a] x mask) + // result: (VRNDSCALEPDMasked128 [a+2] x (VPMOVVec64x2ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec16x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW256, typ.Mask) - v0.AuxInt = int8ToAuxInt(0) - v0.AddArg2(x, y) - v.AddArg(v0) + mask := v_1 + v.reset(OpAMD64VRNDSCALEPDMasked128) + v.AuxInt = int8ToAuxInt(a + 2) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpEqualUint16x32(v *Value) bool { +func rewriteValueAMD64_OpCeilWithPrecisionMaskedFloat64x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (EqualUint16x32 x y) - // result: (VPMOVMToVec16x32 (VPCMPUW512 [0] x y)) + // match: (CeilWithPrecisionMaskedFloat64x4 [a] x mask) + // result: (VRNDSCALEPDMasked256 [a+2] x (VPMOVVec64x4ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec16x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW512, typ.Mask) - v0.AuxInt = int8ToAuxInt(0) - v0.AddArg2(x, y) - v.AddArg(v0) + mask := v_1 + v.reset(OpAMD64VRNDSCALEPDMasked256) + v.AuxInt = int8ToAuxInt(a + 2) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpEqualUint16x8(v *Value) bool { +func rewriteValueAMD64_OpCeilWithPrecisionMaskedFloat64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (EqualUint16x8 x y) - // result: (VPMOVMToVec16x8 (VPCMPUW128 [0] x y)) + // match: (CeilWithPrecisionMaskedFloat64x8 [a] x mask) + // result: (VRNDSCALEPDMasked512 [a+2] x (VPMOVVec64x8ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec16x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW128, typ.Mask) - v0.AuxInt = int8ToAuxInt(0) - v0.AddArg2(x, y) - v.AddArg(v0) + mask := v_1 + v.reset(OpAMD64VRNDSCALEPDMasked512) + v.AuxInt = int8ToAuxInt(a + 2) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpEqualUint32x16(v *Value) bool { +func rewriteValueAMD64_OpCondSelect(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (EqualUint32x16 x y) - // result: (VPMOVMToVec32x16 (VPCMPUD512 [0] x y)) + // match: (CondSelect x y (SETEQ cond)) + // cond: (is64BitInt(t) || isPtr(t)) + // result: (CMOVQEQ y x cond) for { + t := v.Type x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD512, typ.Mask) - v0.AuxInt = int8ToAuxInt(0) - v0.AddArg2(x, y) - v.AddArg(v0) + if v_2.Op != OpAMD64SETEQ { + break + } + cond := v_2.Args[0] + if !(is64BitInt(t) || isPtr(t)) { + break + } + v.reset(OpAMD64CMOVQEQ) + v.AddArg3(y, x, cond) return true } -} -func rewriteValueAMD64_OpEqualUint32x4(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (EqualUint32x4 x y) - // result: (VPMOVMToVec32x4 (VPCMPUD128 [0] x y)) + // match: (CondSelect x y (SETNE cond)) + // cond: (is64BitInt(t) || isPtr(t)) + // result: (CMOVQNE y x cond) for { + t := v.Type x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec32x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD128, typ.Mask) - v0.AuxInt = int8ToAuxInt(0) - v0.AddArg2(x, y) - v.AddArg(v0) + if v_2.Op != OpAMD64SETNE { + break + } + cond := v_2.Args[0] + if !(is64BitInt(t) || isPtr(t)) { + break + } + v.reset(OpAMD64CMOVQNE) + v.AddArg3(y, x, cond) return true } -} -func rewriteValueAMD64_OpEqualUint32x8(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (EqualUint32x8 x y) - // result: (VPMOVMToVec32x8 (VPCMPUD256 [0] x y)) + // match: (CondSelect x y (SETL cond)) + // cond: (is64BitInt(t) || isPtr(t)) + // result: (CMOVQLT y x cond) for { + t := v.Type x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec32x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD256, typ.Mask) - v0.AuxInt = int8ToAuxInt(0) - v0.AddArg2(x, y) - v.AddArg(v0) + if v_2.Op != OpAMD64SETL { + break + } + cond := v_2.Args[0] + if !(is64BitInt(t) || isPtr(t)) { + break + } + v.reset(OpAMD64CMOVQLT) + v.AddArg3(y, x, cond) return true } -} -func rewriteValueAMD64_OpEqualUint64x2(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (EqualUint64x2 x y) - // result: (VPMOVMToVec64x2 (VPCMPUQ128 [0] x y)) + // match: (CondSelect x y (SETG cond)) + // cond: (is64BitInt(t) || isPtr(t)) + // result: (CMOVQGT y x cond) for { + t := v.Type x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec64x2) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ128, typ.Mask) - v0.AuxInt = int8ToAuxInt(0) - v0.AddArg2(x, y) - v.AddArg(v0) + if v_2.Op != OpAMD64SETG { + break + } + cond := v_2.Args[0] + if !(is64BitInt(t) || isPtr(t)) { + break + } + v.reset(OpAMD64CMOVQGT) + v.AddArg3(y, x, cond) return true } -} -func rewriteValueAMD64_OpEqualUint64x4(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (EqualUint64x4 x y) - // result: (VPMOVMToVec64x4 (VPCMPUQ256 [0] x y)) + // match: (CondSelect x y (SETLE cond)) + // cond: (is64BitInt(t) || isPtr(t)) + // result: (CMOVQLE y x cond) for { + t := v.Type x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec64x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ256, typ.Mask) - v0.AuxInt = int8ToAuxInt(0) - v0.AddArg2(x, y) - v.AddArg(v0) + if v_2.Op != OpAMD64SETLE { + break + } + cond := v_2.Args[0] + if !(is64BitInt(t) || isPtr(t)) { + break + } + v.reset(OpAMD64CMOVQLE) + v.AddArg3(y, x, cond) return true } -} -func rewriteValueAMD64_OpEqualUint64x8(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (EqualUint64x8 x y) - // result: (VPMOVMToVec64x8 (VPCMPUQ512 [0] x y)) + // match: (CondSelect x y (SETGE cond)) + // cond: (is64BitInt(t) || isPtr(t)) + // result: (CMOVQGE y x cond) for { + t := v.Type x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ512, typ.Mask) - v0.AuxInt = int8ToAuxInt(0) - v0.AddArg2(x, y) - v.AddArg(v0) + if v_2.Op != OpAMD64SETGE { + break + } + cond := v_2.Args[0] + if !(is64BitInt(t) || isPtr(t)) { + break + } + v.reset(OpAMD64CMOVQGE) + v.AddArg3(y, x, cond) return true } -} -func rewriteValueAMD64_OpEqualUint8x16(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (EqualUint8x16 x y) - // result: (VPMOVMToVec8x16 (VPCMPUB128 [0] x y)) + // match: (CondSelect x y (SETA cond)) + // cond: (is64BitInt(t) || isPtr(t)) + // result: (CMOVQHI y x cond) for { + t := v.Type x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec8x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB128, typ.Mask) - v0.AuxInt = int8ToAuxInt(0) - v0.AddArg2(x, y) - v.AddArg(v0) + if v_2.Op != OpAMD64SETA { + break + } + cond := v_2.Args[0] + if !(is64BitInt(t) || isPtr(t)) { + break + } + v.reset(OpAMD64CMOVQHI) + v.AddArg3(y, x, cond) return true } -} -func rewriteValueAMD64_OpEqualUint8x32(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (EqualUint8x32 x y) - // result: (VPMOVMToVec8x32 (VPCMPUB256 [0] x y)) + // match: (CondSelect x y (SETB cond)) + // cond: (is64BitInt(t) || isPtr(t)) + // result: (CMOVQCS y x cond) for { + t := v.Type x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec8x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB256, typ.Mask) - v0.AuxInt = int8ToAuxInt(0) - v0.AddArg2(x, y) - v.AddArg(v0) + if v_2.Op != OpAMD64SETB { + break + } + cond := v_2.Args[0] + if !(is64BitInt(t) || isPtr(t)) { + break + } + v.reset(OpAMD64CMOVQCS) + v.AddArg3(y, x, cond) return true } -} -func rewriteValueAMD64_OpEqualUint8x64(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (EqualUint8x64 x y) - // result: (VPMOVMToVec8x64 (VPCMPUB512 [0] x y)) + // match: (CondSelect x y (SETAE cond)) + // cond: (is64BitInt(t) || isPtr(t)) + // result: (CMOVQCC y x cond) for { + t := v.Type x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec8x64) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB512, typ.Mask) - v0.AuxInt = int8ToAuxInt(0) - v0.AddArg2(x, y) - v.AddArg(v0) + if v_2.Op != OpAMD64SETAE { + break + } + cond := v_2.Args[0] + if !(is64BitInt(t) || isPtr(t)) { + break + } + v.reset(OpAMD64CMOVQCC) + v.AddArg3(y, x, cond) return true } -} -func rewriteValueAMD64_OpFMA(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (FMA x y z) - // result: (VFMADD231SD z x y) + // match: (CondSelect x y (SETBE cond)) + // cond: (is64BitInt(t) || isPtr(t)) + // result: (CMOVQLS y x cond) for { + t := v.Type x := v_0 y := v_1 - z := v_2 - v.reset(OpAMD64VFMADD231SD) - v.AddArg3(z, x, y) + if v_2.Op != OpAMD64SETBE { + break + } + cond := v_2.Args[0] + if !(is64BitInt(t) || isPtr(t)) { + break + } + v.reset(OpAMD64CMOVQLS) + v.AddArg3(y, x, cond) return true } -} -func rewriteValueAMD64_OpFloor(v *Value) bool { - v_0 := v.Args[0] - // match: (Floor x) - // result: (ROUNDSD [1] x) + // match: (CondSelect x y (SETEQF cond)) + // cond: (is64BitInt(t) || isPtr(t)) + // result: (CMOVQEQF y x cond) for { + t := v.Type x := v_0 - v.reset(OpAMD64ROUNDSD) - v.AuxInt = int8ToAuxInt(1) - v.AddArg(x) + y := v_1 + if v_2.Op != OpAMD64SETEQF { + break + } + cond := v_2.Args[0] + if !(is64BitInt(t) || isPtr(t)) { + break + } + v.reset(OpAMD64CMOVQEQF) + v.AddArg3(y, x, cond) return true } -} -func rewriteValueAMD64_OpFloorFloat32x4(v *Value) bool { - v_0 := v.Args[0] - // match: (FloorFloat32x4 x) - // result: (VROUNDPS128 [1] x) + // match: (CondSelect x y (SETNEF cond)) + // cond: (is64BitInt(t) || isPtr(t)) + // result: (CMOVQNEF y x cond) for { + t := v.Type x := v_0 - v.reset(OpAMD64VROUNDPS128) - v.AuxInt = int8ToAuxInt(1) - v.AddArg(x) + y := v_1 + if v_2.Op != OpAMD64SETNEF { + break + } + cond := v_2.Args[0] + if !(is64BitInt(t) || isPtr(t)) { + break + } + v.reset(OpAMD64CMOVQNEF) + v.AddArg3(y, x, cond) return true } -} -func rewriteValueAMD64_OpFloorFloat32x8(v *Value) bool { - v_0 := v.Args[0] - // match: (FloorFloat32x8 x) - // result: (VROUNDPS256 [1] x) + // match: (CondSelect x y (SETGF cond)) + // cond: (is64BitInt(t) || isPtr(t)) + // result: (CMOVQGTF y x cond) for { + t := v.Type x := v_0 - v.reset(OpAMD64VROUNDPS256) - v.AuxInt = int8ToAuxInt(1) - v.AddArg(x) + y := v_1 + if v_2.Op != OpAMD64SETGF { + break + } + cond := v_2.Args[0] + if !(is64BitInt(t) || isPtr(t)) { + break + } + v.reset(OpAMD64CMOVQGTF) + v.AddArg3(y, x, cond) return true } -} -func rewriteValueAMD64_OpFloorFloat64x2(v *Value) bool { - v_0 := v.Args[0] - // match: (FloorFloat64x2 x) - // result: (VROUNDPD128 [1] x) + // match: (CondSelect x y (SETGEF cond)) + // cond: (is64BitInt(t) || isPtr(t)) + // result: (CMOVQGEF y x cond) for { + t := v.Type x := v_0 - v.reset(OpAMD64VROUNDPD128) - v.AuxInt = int8ToAuxInt(1) - v.AddArg(x) + y := v_1 + if v_2.Op != OpAMD64SETGEF { + break + } + cond := v_2.Args[0] + if !(is64BitInt(t) || isPtr(t)) { + break + } + v.reset(OpAMD64CMOVQGEF) + v.AddArg3(y, x, cond) return true } -} -func rewriteValueAMD64_OpFloorFloat64x4(v *Value) bool { - v_0 := v.Args[0] - // match: (FloorFloat64x4 x) - // result: (VROUNDPD256 [1] x) + // match: (CondSelect x y (SETEQ cond)) + // cond: is32BitInt(t) + // result: (CMOVLEQ y x cond) for { + t := v.Type x := v_0 - v.reset(OpAMD64VROUNDPD256) - v.AuxInt = int8ToAuxInt(1) - v.AddArg(x) + y := v_1 + if v_2.Op != OpAMD64SETEQ { + break + } + cond := v_2.Args[0] + if !(is32BitInt(t)) { + break + } + v.reset(OpAMD64CMOVLEQ) + v.AddArg3(y, x, cond) return true } -} -func rewriteValueAMD64_OpFloorWithPrecisionFloat32x16(v *Value) bool { - v_0 := v.Args[0] - // match: (FloorWithPrecisionFloat32x16 [a] x) - // result: (VRNDSCALEPS512 [a+1] x) + // match: (CondSelect x y (SETNE cond)) + // cond: is32BitInt(t) + // result: (CMOVLNE y x cond) for { - a := auxIntToInt8(v.AuxInt) + t := v.Type x := v_0 - v.reset(OpAMD64VRNDSCALEPS512) - v.AuxInt = int8ToAuxInt(a + 1) - v.AddArg(x) + y := v_1 + if v_2.Op != OpAMD64SETNE { + break + } + cond := v_2.Args[0] + if !(is32BitInt(t)) { + break + } + v.reset(OpAMD64CMOVLNE) + v.AddArg3(y, x, cond) return true } -} -func rewriteValueAMD64_OpFloorWithPrecisionFloat32x4(v *Value) bool { - v_0 := v.Args[0] - // match: (FloorWithPrecisionFloat32x4 [a] x) - // result: (VRNDSCALEPS128 [a+1] x) + // match: (CondSelect x y (SETL cond)) + // cond: is32BitInt(t) + // result: (CMOVLLT y x cond) for { - a := auxIntToInt8(v.AuxInt) + t := v.Type x := v_0 - v.reset(OpAMD64VRNDSCALEPS128) - v.AuxInt = int8ToAuxInt(a + 1) - v.AddArg(x) + y := v_1 + if v_2.Op != OpAMD64SETL { + break + } + cond := v_2.Args[0] + if !(is32BitInt(t)) { + break + } + v.reset(OpAMD64CMOVLLT) + v.AddArg3(y, x, cond) return true } -} -func rewriteValueAMD64_OpFloorWithPrecisionFloat32x8(v *Value) bool { - v_0 := v.Args[0] - // match: (FloorWithPrecisionFloat32x8 [a] x) - // result: (VRNDSCALEPS256 [a+1] x) + // match: (CondSelect x y (SETG cond)) + // cond: is32BitInt(t) + // result: (CMOVLGT y x cond) for { - a := auxIntToInt8(v.AuxInt) + t := v.Type x := v_0 - v.reset(OpAMD64VRNDSCALEPS256) - v.AuxInt = int8ToAuxInt(a + 1) - v.AddArg(x) + y := v_1 + if v_2.Op != OpAMD64SETG { + break + } + cond := v_2.Args[0] + if !(is32BitInt(t)) { + break + } + v.reset(OpAMD64CMOVLGT) + v.AddArg3(y, x, cond) return true } -} -func rewriteValueAMD64_OpFloorWithPrecisionFloat64x2(v *Value) bool { - v_0 := v.Args[0] - // match: (FloorWithPrecisionFloat64x2 [a] x) - // result: (VRNDSCALEPD128 [a+1] x) + // match: (CondSelect x y (SETLE cond)) + // cond: is32BitInt(t) + // result: (CMOVLLE y x cond) for { - a := auxIntToInt8(v.AuxInt) + t := v.Type x := v_0 - v.reset(OpAMD64VRNDSCALEPD128) - v.AuxInt = int8ToAuxInt(a + 1) - v.AddArg(x) + y := v_1 + if v_2.Op != OpAMD64SETLE { + break + } + cond := v_2.Args[0] + if !(is32BitInt(t)) { + break + } + v.reset(OpAMD64CMOVLLE) + v.AddArg3(y, x, cond) return true } -} -func rewriteValueAMD64_OpFloorWithPrecisionFloat64x4(v *Value) bool { - v_0 := v.Args[0] - // match: (FloorWithPrecisionFloat64x4 [a] x) - // result: (VRNDSCALEPD256 [a+1] x) + // match: (CondSelect x y (SETGE cond)) + // cond: is32BitInt(t) + // result: (CMOVLGE y x cond) for { - a := auxIntToInt8(v.AuxInt) + t := v.Type x := v_0 - v.reset(OpAMD64VRNDSCALEPD256) - v.AuxInt = int8ToAuxInt(a + 1) - v.AddArg(x) + y := v_1 + if v_2.Op != OpAMD64SETGE { + break + } + cond := v_2.Args[0] + if !(is32BitInt(t)) { + break + } + v.reset(OpAMD64CMOVLGE) + v.AddArg3(y, x, cond) return true } -} -func rewriteValueAMD64_OpFloorWithPrecisionFloat64x8(v *Value) bool { - v_0 := v.Args[0] - // match: (FloorWithPrecisionFloat64x8 [a] x) - // result: (VRNDSCALEPD512 [a+1] x) + // match: (CondSelect x y (SETA cond)) + // cond: is32BitInt(t) + // result: (CMOVLHI y x cond) for { - a := auxIntToInt8(v.AuxInt) + t := v.Type x := v_0 - v.reset(OpAMD64VRNDSCALEPD512) - v.AuxInt = int8ToAuxInt(a + 1) - v.AddArg(x) + y := v_1 + if v_2.Op != OpAMD64SETA { + break + } + cond := v_2.Args[0] + if !(is32BitInt(t)) { + break + } + v.reset(OpAMD64CMOVLHI) + v.AddArg3(y, x, cond) return true } -} -func rewriteValueAMD64_OpGaloisFieldAffineTransformInversedUint8x16(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (GaloisFieldAffineTransformInversedUint8x16 [a] x y) - // result: (VGF2P8AFFINEINVQB128 [a] x y) + // match: (CondSelect x y (SETB cond)) + // cond: is32BitInt(t) + // result: (CMOVLCS y x cond) for { - a := auxIntToInt8(v.AuxInt) + t := v.Type x := v_0 y := v_1 - v.reset(OpAMD64VGF2P8AFFINEINVQB128) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) + if v_2.Op != OpAMD64SETB { + break + } + cond := v_2.Args[0] + if !(is32BitInt(t)) { + break + } + v.reset(OpAMD64CMOVLCS) + v.AddArg3(y, x, cond) return true } -} -func rewriteValueAMD64_OpGaloisFieldAffineTransformInversedUint8x32(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (GaloisFieldAffineTransformInversedUint8x32 [a] x y) - // result: (VGF2P8AFFINEINVQB256 [a] x y) + // match: (CondSelect x y (SETAE cond)) + // cond: is32BitInt(t) + // result: (CMOVLCC y x cond) for { - a := auxIntToInt8(v.AuxInt) + t := v.Type x := v_0 y := v_1 - v.reset(OpAMD64VGF2P8AFFINEINVQB256) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) + if v_2.Op != OpAMD64SETAE { + break + } + cond := v_2.Args[0] + if !(is32BitInt(t)) { + break + } + v.reset(OpAMD64CMOVLCC) + v.AddArg3(y, x, cond) return true } -} -func rewriteValueAMD64_OpGaloisFieldAffineTransformInversedUint8x64(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (GaloisFieldAffineTransformInversedUint8x64 [a] x y) - // result: (VGF2P8AFFINEINVQB512 [a] x y) + // match: (CondSelect x y (SETBE cond)) + // cond: is32BitInt(t) + // result: (CMOVLLS y x cond) for { - a := auxIntToInt8(v.AuxInt) + t := v.Type x := v_0 y := v_1 - v.reset(OpAMD64VGF2P8AFFINEINVQB512) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) + if v_2.Op != OpAMD64SETBE { + break + } + cond := v_2.Args[0] + if !(is32BitInt(t)) { + break + } + v.reset(OpAMD64CMOVLLS) + v.AddArg3(y, x, cond) return true } -} -func rewriteValueAMD64_OpGaloisFieldAffineTransformUint8x16(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (GaloisFieldAffineTransformUint8x16 [a] x y) - // result: (VGF2P8AFFINEQB128 [a] x y) + // match: (CondSelect x y (SETEQF cond)) + // cond: is32BitInt(t) + // result: (CMOVLEQF y x cond) for { - a := auxIntToInt8(v.AuxInt) + t := v.Type x := v_0 y := v_1 - v.reset(OpAMD64VGF2P8AFFINEQB128) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) + if v_2.Op != OpAMD64SETEQF { + break + } + cond := v_2.Args[0] + if !(is32BitInt(t)) { + break + } + v.reset(OpAMD64CMOVLEQF) + v.AddArg3(y, x, cond) return true } -} -func rewriteValueAMD64_OpGaloisFieldAffineTransformUint8x32(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (GaloisFieldAffineTransformUint8x32 [a] x y) - // result: (VGF2P8AFFINEQB256 [a] x y) + // match: (CondSelect x y (SETNEF cond)) + // cond: is32BitInt(t) + // result: (CMOVLNEF y x cond) for { - a := auxIntToInt8(v.AuxInt) + t := v.Type x := v_0 y := v_1 - v.reset(OpAMD64VGF2P8AFFINEQB256) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) + if v_2.Op != OpAMD64SETNEF { + break + } + cond := v_2.Args[0] + if !(is32BitInt(t)) { + break + } + v.reset(OpAMD64CMOVLNEF) + v.AddArg3(y, x, cond) return true } -} -func rewriteValueAMD64_OpGaloisFieldAffineTransformUint8x64(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (GaloisFieldAffineTransformUint8x64 [a] x y) - // result: (VGF2P8AFFINEQB512 [a] x y) + // match: (CondSelect x y (SETGF cond)) + // cond: is32BitInt(t) + // result: (CMOVLGTF y x cond) for { - a := auxIntToInt8(v.AuxInt) + t := v.Type x := v_0 y := v_1 - v.reset(OpAMD64VGF2P8AFFINEQB512) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) + if v_2.Op != OpAMD64SETGF { + break + } + cond := v_2.Args[0] + if !(is32BitInt(t)) { + break + } + v.reset(OpAMD64CMOVLGTF) + v.AddArg3(y, x, cond) return true } -} -func rewriteValueAMD64_OpGet128Float32x8(v *Value) bool { - v_0 := v.Args[0] - // match: (Get128Float32x8 [a] x) - // result: (VEXTRACTF128128 [a] x) + // match: (CondSelect x y (SETGEF cond)) + // cond: is32BitInt(t) + // result: (CMOVLGEF y x cond) for { - a := auxIntToInt8(v.AuxInt) + t := v.Type x := v_0 - v.reset(OpAMD64VEXTRACTF128128) - v.AuxInt = int8ToAuxInt(a) - v.AddArg(x) + y := v_1 + if v_2.Op != OpAMD64SETGEF { + break + } + cond := v_2.Args[0] + if !(is32BitInt(t)) { + break + } + v.reset(OpAMD64CMOVLGEF) + v.AddArg3(y, x, cond) return true } -} -func rewriteValueAMD64_OpGet128Float64x4(v *Value) bool { - v_0 := v.Args[0] - // match: (Get128Float64x4 [a] x) - // result: (VEXTRACTF128128 [a] x) + // match: (CondSelect x y (SETEQ cond)) + // cond: is16BitInt(t) + // result: (CMOVWEQ y x cond) for { - a := auxIntToInt8(v.AuxInt) + t := v.Type x := v_0 - v.reset(OpAMD64VEXTRACTF128128) - v.AuxInt = int8ToAuxInt(a) - v.AddArg(x) + y := v_1 + if v_2.Op != OpAMD64SETEQ { + break + } + cond := v_2.Args[0] + if !(is16BitInt(t)) { + break + } + v.reset(OpAMD64CMOVWEQ) + v.AddArg3(y, x, cond) return true } -} -func rewriteValueAMD64_OpGet128Int16x16(v *Value) bool { - v_0 := v.Args[0] - // match: (Get128Int16x16 [a] x) - // result: (VEXTRACTI128128 [a] x) + // match: (CondSelect x y (SETNE cond)) + // cond: is16BitInt(t) + // result: (CMOVWNE y x cond) for { - a := auxIntToInt8(v.AuxInt) + t := v.Type x := v_0 - v.reset(OpAMD64VEXTRACTI128128) - v.AuxInt = int8ToAuxInt(a) - v.AddArg(x) + y := v_1 + if v_2.Op != OpAMD64SETNE { + break + } + cond := v_2.Args[0] + if !(is16BitInt(t)) { + break + } + v.reset(OpAMD64CMOVWNE) + v.AddArg3(y, x, cond) return true } -} -func rewriteValueAMD64_OpGet128Int32x8(v *Value) bool { - v_0 := v.Args[0] - // match: (Get128Int32x8 [a] x) - // result: (VEXTRACTI128128 [a] x) + // match: (CondSelect x y (SETL cond)) + // cond: is16BitInt(t) + // result: (CMOVWLT y x cond) for { - a := auxIntToInt8(v.AuxInt) + t := v.Type x := v_0 - v.reset(OpAMD64VEXTRACTI128128) - v.AuxInt = int8ToAuxInt(a) - v.AddArg(x) + y := v_1 + if v_2.Op != OpAMD64SETL { + break + } + cond := v_2.Args[0] + if !(is16BitInt(t)) { + break + } + v.reset(OpAMD64CMOVWLT) + v.AddArg3(y, x, cond) return true } -} -func rewriteValueAMD64_OpGet128Int64x4(v *Value) bool { - v_0 := v.Args[0] - // match: (Get128Int64x4 [a] x) - // result: (VEXTRACTI128128 [a] x) + // match: (CondSelect x y (SETG cond)) + // cond: is16BitInt(t) + // result: (CMOVWGT y x cond) for { - a := auxIntToInt8(v.AuxInt) + t := v.Type x := v_0 - v.reset(OpAMD64VEXTRACTI128128) - v.AuxInt = int8ToAuxInt(a) - v.AddArg(x) + y := v_1 + if v_2.Op != OpAMD64SETG { + break + } + cond := v_2.Args[0] + if !(is16BitInt(t)) { + break + } + v.reset(OpAMD64CMOVWGT) + v.AddArg3(y, x, cond) return true } -} -func rewriteValueAMD64_OpGet128Int8x32(v *Value) bool { - v_0 := v.Args[0] - // match: (Get128Int8x32 [a] x) - // result: (VEXTRACTI128128 [a] x) + // match: (CondSelect x y (SETLE cond)) + // cond: is16BitInt(t) + // result: (CMOVWLE y x cond) for { - a := auxIntToInt8(v.AuxInt) + t := v.Type x := v_0 - v.reset(OpAMD64VEXTRACTI128128) - v.AuxInt = int8ToAuxInt(a) - v.AddArg(x) + y := v_1 + if v_2.Op != OpAMD64SETLE { + break + } + cond := v_2.Args[0] + if !(is16BitInt(t)) { + break + } + v.reset(OpAMD64CMOVWLE) + v.AddArg3(y, x, cond) return true } -} -func rewriteValueAMD64_OpGet128Uint16x16(v *Value) bool { - v_0 := v.Args[0] - // match: (Get128Uint16x16 [a] x) - // result: (VEXTRACTI128128 [a] x) + // match: (CondSelect x y (SETGE cond)) + // cond: is16BitInt(t) + // result: (CMOVWGE y x cond) for { - a := auxIntToInt8(v.AuxInt) + t := v.Type x := v_0 - v.reset(OpAMD64VEXTRACTI128128) - v.AuxInt = int8ToAuxInt(a) - v.AddArg(x) + y := v_1 + if v_2.Op != OpAMD64SETGE { + break + } + cond := v_2.Args[0] + if !(is16BitInt(t)) { + break + } + v.reset(OpAMD64CMOVWGE) + v.AddArg3(y, x, cond) return true } -} -func rewriteValueAMD64_OpGet128Uint32x8(v *Value) bool { - v_0 := v.Args[0] - // match: (Get128Uint32x8 [a] x) - // result: (VEXTRACTI128128 [a] x) + // match: (CondSelect x y (SETA cond)) + // cond: is16BitInt(t) + // result: (CMOVWHI y x cond) for { - a := auxIntToInt8(v.AuxInt) + t := v.Type x := v_0 - v.reset(OpAMD64VEXTRACTI128128) - v.AuxInt = int8ToAuxInt(a) - v.AddArg(x) + y := v_1 + if v_2.Op != OpAMD64SETA { + break + } + cond := v_2.Args[0] + if !(is16BitInt(t)) { + break + } + v.reset(OpAMD64CMOVWHI) + v.AddArg3(y, x, cond) return true } -} -func rewriteValueAMD64_OpGet128Uint64x4(v *Value) bool { - v_0 := v.Args[0] - // match: (Get128Uint64x4 [a] x) - // result: (VEXTRACTI128128 [a] x) + // match: (CondSelect x y (SETB cond)) + // cond: is16BitInt(t) + // result: (CMOVWCS y x cond) for { - a := auxIntToInt8(v.AuxInt) + t := v.Type x := v_0 - v.reset(OpAMD64VEXTRACTI128128) - v.AuxInt = int8ToAuxInt(a) - v.AddArg(x) + y := v_1 + if v_2.Op != OpAMD64SETB { + break + } + cond := v_2.Args[0] + if !(is16BitInt(t)) { + break + } + v.reset(OpAMD64CMOVWCS) + v.AddArg3(y, x, cond) return true } -} -func rewriteValueAMD64_OpGet128Uint8x32(v *Value) bool { - v_0 := v.Args[0] - // match: (Get128Uint8x32 [a] x) - // result: (VEXTRACTI128128 [a] x) + // match: (CondSelect x y (SETAE cond)) + // cond: is16BitInt(t) + // result: (CMOVWCC y x cond) for { - a := auxIntToInt8(v.AuxInt) + t := v.Type x := v_0 - v.reset(OpAMD64VEXTRACTI128128) - v.AuxInt = int8ToAuxInt(a) - v.AddArg(x) + y := v_1 + if v_2.Op != OpAMD64SETAE { + break + } + cond := v_2.Args[0] + if !(is16BitInt(t)) { + break + } + v.reset(OpAMD64CMOVWCC) + v.AddArg3(y, x, cond) return true } -} -func rewriteValueAMD64_OpGetElemInt16x8(v *Value) bool { - v_0 := v.Args[0] - // match: (GetElemInt16x8 [a] x) - // result: (VPEXTRW128 [a] x) + // match: (CondSelect x y (SETBE cond)) + // cond: is16BitInt(t) + // result: (CMOVWLS y x cond) for { - a := auxIntToInt8(v.AuxInt) + t := v.Type x := v_0 - v.reset(OpAMD64VPEXTRW128) - v.AuxInt = int8ToAuxInt(a) - v.AddArg(x) + y := v_1 + if v_2.Op != OpAMD64SETBE { + break + } + cond := v_2.Args[0] + if !(is16BitInt(t)) { + break + } + v.reset(OpAMD64CMOVWLS) + v.AddArg3(y, x, cond) return true } -} -func rewriteValueAMD64_OpGetElemInt32x4(v *Value) bool { - v_0 := v.Args[0] - // match: (GetElemInt32x4 [a] x) - // result: (VPEXTRD128 [a] x) + // match: (CondSelect x y (SETEQF cond)) + // cond: is16BitInt(t) + // result: (CMOVWEQF y x cond) for { - a := auxIntToInt8(v.AuxInt) + t := v.Type x := v_0 - v.reset(OpAMD64VPEXTRD128) - v.AuxInt = int8ToAuxInt(a) - v.AddArg(x) + y := v_1 + if v_2.Op != OpAMD64SETEQF { + break + } + cond := v_2.Args[0] + if !(is16BitInt(t)) { + break + } + v.reset(OpAMD64CMOVWEQF) + v.AddArg3(y, x, cond) return true } -} -func rewriteValueAMD64_OpGetElemInt64x2(v *Value) bool { - v_0 := v.Args[0] - // match: (GetElemInt64x2 [a] x) - // result: (VPEXTRQ128 [a] x) + // match: (CondSelect x y (SETNEF cond)) + // cond: is16BitInt(t) + // result: (CMOVWNEF y x cond) for { - a := auxIntToInt8(v.AuxInt) + t := v.Type x := v_0 - v.reset(OpAMD64VPEXTRQ128) - v.AuxInt = int8ToAuxInt(a) - v.AddArg(x) + y := v_1 + if v_2.Op != OpAMD64SETNEF { + break + } + cond := v_2.Args[0] + if !(is16BitInt(t)) { + break + } + v.reset(OpAMD64CMOVWNEF) + v.AddArg3(y, x, cond) return true } -} -func rewriteValueAMD64_OpGetElemInt8x16(v *Value) bool { - v_0 := v.Args[0] - // match: (GetElemInt8x16 [a] x) - // result: (VPEXTRB128 [a] x) + // match: (CondSelect x y (SETGF cond)) + // cond: is16BitInt(t) + // result: (CMOVWGTF y x cond) for { - a := auxIntToInt8(v.AuxInt) + t := v.Type x := v_0 - v.reset(OpAMD64VPEXTRB128) - v.AuxInt = int8ToAuxInt(a) - v.AddArg(x) + y := v_1 + if v_2.Op != OpAMD64SETGF { + break + } + cond := v_2.Args[0] + if !(is16BitInt(t)) { + break + } + v.reset(OpAMD64CMOVWGTF) + v.AddArg3(y, x, cond) return true } -} -func rewriteValueAMD64_OpGetElemUint16x8(v *Value) bool { - v_0 := v.Args[0] - // match: (GetElemUint16x8 [a] x) - // result: (VPEXTRW128 [a] x) + // match: (CondSelect x y (SETGEF cond)) + // cond: is16BitInt(t) + // result: (CMOVWGEF y x cond) for { - a := auxIntToInt8(v.AuxInt) + t := v.Type x := v_0 - v.reset(OpAMD64VPEXTRW128) - v.AuxInt = int8ToAuxInt(a) - v.AddArg(x) + y := v_1 + if v_2.Op != OpAMD64SETGEF { + break + } + cond := v_2.Args[0] + if !(is16BitInt(t)) { + break + } + v.reset(OpAMD64CMOVWGEF) + v.AddArg3(y, x, cond) return true } -} -func rewriteValueAMD64_OpGetElemUint32x4(v *Value) bool { - v_0 := v.Args[0] - // match: (GetElemUint32x4 [a] x) - // result: (VPEXTRD128 [a] x) + // match: (CondSelect x y check) + // cond: !check.Type.IsFlags() && check.Type.Size() == 1 + // result: (CondSelect x y (MOVBQZX check)) for { - a := auxIntToInt8(v.AuxInt) + t := v.Type x := v_0 - v.reset(OpAMD64VPEXTRD128) - v.AuxInt = int8ToAuxInt(a) - v.AddArg(x) + y := v_1 + check := v_2 + if !(!check.Type.IsFlags() && check.Type.Size() == 1) { + break + } + v.reset(OpCondSelect) + v.Type = t + v0 := b.NewValue0(v.Pos, OpAMD64MOVBQZX, typ.UInt64) + v0.AddArg(check) + v.AddArg3(x, y, v0) return true } -} -func rewriteValueAMD64_OpGetElemUint64x2(v *Value) bool { - v_0 := v.Args[0] - // match: (GetElemUint64x2 [a] x) - // result: (VPEXTRQ128 [a] x) + // match: (CondSelect x y check) + // cond: !check.Type.IsFlags() && check.Type.Size() == 2 + // result: (CondSelect x y (MOVWQZX check)) for { - a := auxIntToInt8(v.AuxInt) + t := v.Type x := v_0 - v.reset(OpAMD64VPEXTRQ128) - v.AuxInt = int8ToAuxInt(a) - v.AddArg(x) + y := v_1 + check := v_2 + if !(!check.Type.IsFlags() && check.Type.Size() == 2) { + break + } + v.reset(OpCondSelect) + v.Type = t + v0 := b.NewValue0(v.Pos, OpAMD64MOVWQZX, typ.UInt64) + v0.AddArg(check) + v.AddArg3(x, y, v0) return true } -} -func rewriteValueAMD64_OpGetElemUint8x16(v *Value) bool { - v_0 := v.Args[0] - // match: (GetElemUint8x16 [a] x) - // result: (VPEXTRB128 [a] x) + // match: (CondSelect x y check) + // cond: !check.Type.IsFlags() && check.Type.Size() == 4 + // result: (CondSelect x y (MOVLQZX check)) for { - a := auxIntToInt8(v.AuxInt) + t := v.Type x := v_0 - v.reset(OpAMD64VPEXTRB128) - v.AuxInt = int8ToAuxInt(a) - v.AddArg(x) + y := v_1 + check := v_2 + if !(!check.Type.IsFlags() && check.Type.Size() == 4) { + break + } + v.reset(OpCondSelect) + v.Type = t + v0 := b.NewValue0(v.Pos, OpAMD64MOVLQZX, typ.UInt64) + v0.AddArg(check) + v.AddArg3(x, y, v0) return true } -} -func rewriteValueAMD64_OpGetG(v *Value) bool { - v_0 := v.Args[0] - // match: (GetG mem) - // cond: v.Block.Func.OwnAux.Fn.ABI() != obj.ABIInternal - // result: (LoweredGetG mem) + // match: (CondSelect x y check) + // cond: !check.Type.IsFlags() && check.Type.Size() == 8 && (is64BitInt(t) || isPtr(t)) + // result: (CMOVQNE y x (CMPQconst [0] check)) for { - mem := v_0 - if !(v.Block.Func.OwnAux.Fn.ABI() != obj.ABIInternal) { + t := v.Type + x := v_0 + y := v_1 + check := v_2 + if !(!check.Type.IsFlags() && check.Type.Size() == 8 && (is64BitInt(t) || isPtr(t))) { break } - v.reset(OpAMD64LoweredGetG) - v.AddArg(mem) + v.reset(OpAMD64CMOVQNE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) + v0.AuxInt = int32ToAuxInt(0) + v0.AddArg(check) + v.AddArg3(y, x, v0) return true } - return false -} -func rewriteValueAMD64_OpGreaterEqualFloat32x16(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterEqualFloat32x16 x y) - // result: (VPMOVMToVec32x16 (VCMPPS512 [13] x y)) + // match: (CondSelect x y check) + // cond: !check.Type.IsFlags() && check.Type.Size() == 8 && is32BitInt(t) + // result: (CMOVLNE y x (CMPQconst [0] check)) for { + t := v.Type x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask) - v0.AuxInt = int8ToAuxInt(13) - v0.AddArg2(x, y) - v.AddArg(v0) + check := v_2 + if !(!check.Type.IsFlags() && check.Type.Size() == 8 && is32BitInt(t)) { + break + } + v.reset(OpAMD64CMOVLNE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) + v0.AuxInt = int32ToAuxInt(0) + v0.AddArg(check) + v.AddArg3(y, x, v0) return true } -} -func rewriteValueAMD64_OpGreaterEqualFloat32x4(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (GreaterEqualFloat32x4 x y) - // result: (VCMPPS128 [13] x y) + // match: (CondSelect x y check) + // cond: !check.Type.IsFlags() && check.Type.Size() == 8 && is16BitInt(t) + // result: (CMOVWNE y x (CMPQconst [0] check)) for { + t := v.Type x := v_0 y := v_1 - v.reset(OpAMD64VCMPPS128) - v.AuxInt = int8ToAuxInt(13) - v.AddArg2(x, y) + check := v_2 + if !(!check.Type.IsFlags() && check.Type.Size() == 8 && is16BitInt(t)) { + break + } + v.reset(OpAMD64CMOVWNE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) + v0.AuxInt = int32ToAuxInt(0) + v0.AddArg(check) + v.AddArg3(y, x, v0) return true } + return false } -func rewriteValueAMD64_OpGreaterEqualFloat32x8(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (GreaterEqualFloat32x8 x y) - // result: (VCMPPS256 [13] x y) +func rewriteValueAMD64_OpConst16(v *Value) bool { + // match: (Const16 [c]) + // result: (MOVLconst [int32(c)]) for { - x := v_0 - y := v_1 - v.reset(OpAMD64VCMPPS256) - v.AuxInt = int8ToAuxInt(13) - v.AddArg2(x, y) + c := auxIntToInt16(v.AuxInt) + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(int32(c)) return true } } -func rewriteValueAMD64_OpGreaterEqualFloat64x2(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (GreaterEqualFloat64x2 x y) - // result: (VCMPPD128 [13] x y) +func rewriteValueAMD64_OpConst8(v *Value) bool { + // match: (Const8 [c]) + // result: (MOVLconst [int32(c)]) for { - x := v_0 - y := v_1 - v.reset(OpAMD64VCMPPD128) - v.AuxInt = int8ToAuxInt(13) - v.AddArg2(x, y) + c := auxIntToInt8(v.AuxInt) + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(int32(c)) return true } } -func rewriteValueAMD64_OpGreaterEqualFloat64x4(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (GreaterEqualFloat64x4 x y) - // result: (VCMPPD256 [13] x y) +func rewriteValueAMD64_OpConstBool(v *Value) bool { + // match: (ConstBool [c]) + // result: (MOVLconst [b2i32(c)]) for { - x := v_0 - y := v_1 - v.reset(OpAMD64VCMPPD256) - v.AuxInt = int8ToAuxInt(13) - v.AddArg2(x, y) + c := auxIntToBool(v.AuxInt) + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(b2i32(c)) return true } } -func rewriteValueAMD64_OpGreaterEqualFloat64x8(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterEqualFloat64x8 x y) - // result: (VPMOVMToVec64x8 (VCMPPD512 [13] x y)) +func rewriteValueAMD64_OpConstNil(v *Value) bool { + // match: (ConstNil ) + // result: (MOVQconst [0]) for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask) - v0.AuxInt = int8ToAuxInt(13) - v0.AddArg2(x, y) - v.AddArg(v0) + v.reset(OpAMD64MOVQconst) + v.AuxInt = int64ToAuxInt(0) return true } } -func rewriteValueAMD64_OpGreaterEqualInt16x16(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpCtz16(v *Value) bool { v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (GreaterEqualInt16x16 x y) - // result: (VPMOVMToVec16x16 (VPCMPW256 [13] x y)) + // match: (Ctz16 x) + // result: (BSFL (ORLconst [1<<16] x)) for { x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec16x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW256, typ.Mask) - v0.AuxInt = int8ToAuxInt(13) - v0.AddArg2(x, y) + v.reset(OpAMD64BSFL) + v0 := b.NewValue0(v.Pos, OpAMD64ORLconst, typ.UInt32) + v0.AuxInt = int32ToAuxInt(1 << 16) + v0.AddArg(x) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpGreaterEqualInt16x32(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpCtz16NonZero(v *Value) bool { v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterEqualInt16x32 x y) - // result: (VPMOVMToVec16x32 (VPCMPW512 [13] x y)) + // match: (Ctz16NonZero x) + // cond: buildcfg.GOAMD64 >= 3 + // result: (TZCNTL x) for { x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec16x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW512, typ.Mask) - v0.AuxInt = int8ToAuxInt(13) - v0.AddArg2(x, y) - v.AddArg(v0) + if !(buildcfg.GOAMD64 >= 3) { + break + } + v.reset(OpAMD64TZCNTL) + v.AddArg(x) return true } -} -func rewriteValueAMD64_OpGreaterEqualInt16x8(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterEqualInt16x8 x y) - // result: (VPMOVMToVec16x8 (VPCMPW128 [13] x y)) + // match: (Ctz16NonZero x) + // cond: buildcfg.GOAMD64 < 3 + // result: (BSFL x) for { x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec16x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW128, typ.Mask) - v0.AuxInt = int8ToAuxInt(13) - v0.AddArg2(x, y) - v.AddArg(v0) + if !(buildcfg.GOAMD64 < 3) { + break + } + v.reset(OpAMD64BSFL) + v.AddArg(x) return true } + return false } -func rewriteValueAMD64_OpGreaterEqualInt32x16(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpCtz32(v *Value) bool { v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (GreaterEqualInt32x16 x y) - // result: (VPMOVMToVec32x16 (VPCMPD512 [13] x y)) + // match: (Ctz32 x) + // cond: buildcfg.GOAMD64 >= 3 + // result: (TZCNTL x) for { x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD512, typ.Mask) - v0.AuxInt = int8ToAuxInt(13) - v0.AddArg2(x, y) - v.AddArg(v0) + if !(buildcfg.GOAMD64 >= 3) { + break + } + v.reset(OpAMD64TZCNTL) + v.AddArg(x) return true } -} -func rewriteValueAMD64_OpGreaterEqualInt32x4(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterEqualInt32x4 x y) - // result: (VPMOVMToVec32x4 (VPCMPD128 [13] x y)) + // match: (Ctz32 x) + // cond: buildcfg.GOAMD64 < 3 + // result: (Select0 (BSFQ (BTSQconst [32] x))) for { x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec32x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD128, typ.Mask) - v0.AuxInt = int8ToAuxInt(13) - v0.AddArg2(x, y) + if !(buildcfg.GOAMD64 < 3) { + break + } + v.reset(OpSelect0) + v0 := b.NewValue0(v.Pos, OpAMD64BSFQ, types.NewTuple(typ.UInt64, types.TypeFlags)) + v1 := b.NewValue0(v.Pos, OpAMD64BTSQconst, typ.UInt64) + v1.AuxInt = int8ToAuxInt(32) + v1.AddArg(x) + v0.AddArg(v1) v.AddArg(v0) return true } + return false } -func rewriteValueAMD64_OpGreaterEqualInt32x8(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpCtz32NonZero(v *Value) bool { v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterEqualInt32x8 x y) - // result: (VPMOVMToVec32x8 (VPCMPD256 [13] x y)) + // match: (Ctz32NonZero x) + // cond: buildcfg.GOAMD64 >= 3 + // result: (TZCNTL x) for { x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec32x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD256, typ.Mask) - v0.AuxInt = int8ToAuxInt(13) - v0.AddArg2(x, y) - v.AddArg(v0) + if !(buildcfg.GOAMD64 >= 3) { + break + } + v.reset(OpAMD64TZCNTL) + v.AddArg(x) return true } -} -func rewriteValueAMD64_OpGreaterEqualInt64x2(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterEqualInt64x2 x y) - // result: (VPMOVMToVec64x2 (VPCMPQ128 [13] x y)) + // match: (Ctz32NonZero x) + // cond: buildcfg.GOAMD64 < 3 + // result: (BSFL x) for { x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec64x2) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ128, typ.Mask) - v0.AuxInt = int8ToAuxInt(13) - v0.AddArg2(x, y) - v.AddArg(v0) + if !(buildcfg.GOAMD64 < 3) { + break + } + v.reset(OpAMD64BSFL) + v.AddArg(x) return true } + return false } -func rewriteValueAMD64_OpGreaterEqualInt64x4(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpCtz64(v *Value) bool { v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (GreaterEqualInt64x4 x y) - // result: (VPMOVMToVec64x4 (VPCMPQ256 [13] x y)) + // match: (Ctz64 x) + // cond: buildcfg.GOAMD64 >= 3 + // result: (TZCNTQ x) for { x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec64x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ256, typ.Mask) - v0.AuxInt = int8ToAuxInt(13) - v0.AddArg2(x, y) - v.AddArg(v0) + if !(buildcfg.GOAMD64 >= 3) { + break + } + v.reset(OpAMD64TZCNTQ) + v.AddArg(x) return true } -} -func rewriteValueAMD64_OpGreaterEqualInt64x8(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterEqualInt64x8 x y) - // result: (VPMOVMToVec64x8 (VPCMPQ512 [13] x y)) + // match: (Ctz64 x) + // cond: buildcfg.GOAMD64 < 3 + // result: (CMOVQEQ (Select0 (BSFQ x)) (MOVQconst [64]) (Select1 (BSFQ x))) for { + t := v.Type x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ512, typ.Mask) - v0.AuxInt = int8ToAuxInt(13) - v0.AddArg2(x, y) - v.AddArg(v0) + if !(buildcfg.GOAMD64 < 3) { + break + } + v.reset(OpAMD64CMOVQEQ) + v0 := b.NewValue0(v.Pos, OpSelect0, t) + v1 := b.NewValue0(v.Pos, OpAMD64BSFQ, types.NewTuple(typ.UInt64, types.TypeFlags)) + v1.AddArg(x) + v0.AddArg(v1) + v2 := b.NewValue0(v.Pos, OpAMD64MOVQconst, t) + v2.AuxInt = int64ToAuxInt(64) + v3 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) + v3.AddArg(v1) + v.AddArg3(v0, v2, v3) return true } + return false } -func rewriteValueAMD64_OpGreaterEqualInt8x16(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpCtz64NonZero(v *Value) bool { v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (GreaterEqualInt8x16 x y) - // result: (VPMOVMToVec8x16 (VPCMPB128 [13] x y)) + // match: (Ctz64NonZero x) + // cond: buildcfg.GOAMD64 >= 3 + // result: (TZCNTQ x) for { x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec8x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB128, typ.Mask) - v0.AuxInt = int8ToAuxInt(13) - v0.AddArg2(x, y) - v.AddArg(v0) + if !(buildcfg.GOAMD64 >= 3) { + break + } + v.reset(OpAMD64TZCNTQ) + v.AddArg(x) return true } -} -func rewriteValueAMD64_OpGreaterEqualInt8x32(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterEqualInt8x32 x y) - // result: (VPMOVMToVec8x32 (VPCMPB256 [13] x y)) + // match: (Ctz64NonZero x) + // cond: buildcfg.GOAMD64 < 3 + // result: (Select0 (BSFQ x)) for { x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec8x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB256, typ.Mask) - v0.AuxInt = int8ToAuxInt(13) - v0.AddArg2(x, y) + if !(buildcfg.GOAMD64 < 3) { + break + } + v.reset(OpSelect0) + v0 := b.NewValue0(v.Pos, OpAMD64BSFQ, types.NewTuple(typ.UInt64, types.TypeFlags)) + v0.AddArg(x) v.AddArg(v0) return true } + return false } -func rewriteValueAMD64_OpGreaterEqualInt8x64(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpCtz8(v *Value) bool { v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (GreaterEqualInt8x64 x y) - // result: (VPMOVMToVec8x64 (VPCMPB512 [13] x y)) + // match: (Ctz8 x) + // result: (BSFL (ORLconst [1<<8 ] x)) for { x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec8x64) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB512, typ.Mask) - v0.AuxInt = int8ToAuxInt(13) - v0.AddArg2(x, y) + v.reset(OpAMD64BSFL) + v0 := b.NewValue0(v.Pos, OpAMD64ORLconst, typ.UInt32) + v0.AuxInt = int32ToAuxInt(1 << 8) + v0.AddArg(x) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpGreaterEqualUint16x16(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpCtz8NonZero(v *Value) bool { v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterEqualUint16x16 x y) - // result: (VPMOVMToVec16x16 (VPCMPUW256 [13] x y)) + // match: (Ctz8NonZero x) + // cond: buildcfg.GOAMD64 >= 3 + // result: (TZCNTL x) for { x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec16x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW256, typ.Mask) - v0.AuxInt = int8ToAuxInt(13) - v0.AddArg2(x, y) - v.AddArg(v0) + if !(buildcfg.GOAMD64 >= 3) { + break + } + v.reset(OpAMD64TZCNTL) + v.AddArg(x) return true } -} -func rewriteValueAMD64_OpGreaterEqualUint16x32(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterEqualUint16x32 x y) - // result: (VPMOVMToVec16x32 (VPCMPUW512 [13] x y)) + // match: (Ctz8NonZero x) + // cond: buildcfg.GOAMD64 < 3 + // result: (BSFL x) for { x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec16x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW512, typ.Mask) - v0.AuxInt = int8ToAuxInt(13) - v0.AddArg2(x, y) - v.AddArg(v0) + if !(buildcfg.GOAMD64 < 3) { + break + } + v.reset(OpAMD64BSFL) + v.AddArg(x) return true } + return false } -func rewriteValueAMD64_OpGreaterEqualUint16x8(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpDiffWithCeilWithPrecisionFloat32x16(v *Value) bool { v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterEqualUint16x8 x y) - // result: (VPMOVMToVec16x8 (VPCMPUW128 [13] x y)) + // match: (DiffWithCeilWithPrecisionFloat32x16 [a] x) + // result: (VREDUCEPS512 [a+2] x) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec16x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW128, typ.Mask) - v0.AuxInt = int8ToAuxInt(13) - v0.AddArg2(x, y) - v.AddArg(v0) + v.reset(OpAMD64VREDUCEPS512) + v.AuxInt = int8ToAuxInt(a + 2) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpGreaterEqualUint32x16(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpDiffWithCeilWithPrecisionFloat32x4(v *Value) bool { v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterEqualUint32x16 x y) - // result: (VPMOVMToVec32x16 (VPCMPUD512 [13] x y)) + // match: (DiffWithCeilWithPrecisionFloat32x4 [a] x) + // result: (VREDUCEPS128 [a+2] x) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD512, typ.Mask) - v0.AuxInt = int8ToAuxInt(13) - v0.AddArg2(x, y) - v.AddArg(v0) + v.reset(OpAMD64VREDUCEPS128) + v.AuxInt = int8ToAuxInt(a + 2) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpGreaterEqualUint32x4(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpDiffWithCeilWithPrecisionFloat32x8(v *Value) bool { v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterEqualUint32x4 x y) - // result: (VPMOVMToVec32x4 (VPCMPUD128 [13] x y)) + // match: (DiffWithCeilWithPrecisionFloat32x8 [a] x) + // result: (VREDUCEPS256 [a+2] x) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec32x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD128, typ.Mask) - v0.AuxInt = int8ToAuxInt(13) - v0.AddArg2(x, y) - v.AddArg(v0) + v.reset(OpAMD64VREDUCEPS256) + v.AuxInt = int8ToAuxInt(a + 2) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpGreaterEqualUint32x8(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpDiffWithCeilWithPrecisionFloat64x2(v *Value) bool { v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterEqualUint32x8 x y) - // result: (VPMOVMToVec32x8 (VPCMPUD256 [13] x y)) + // match: (DiffWithCeilWithPrecisionFloat64x2 [a] x) + // result: (VREDUCEPD128 [a+2] x) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec32x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD256, typ.Mask) - v0.AuxInt = int8ToAuxInt(13) - v0.AddArg2(x, y) - v.AddArg(v0) + v.reset(OpAMD64VREDUCEPD128) + v.AuxInt = int8ToAuxInt(a + 2) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpGreaterEqualUint64x2(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpDiffWithCeilWithPrecisionFloat64x4(v *Value) bool { v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterEqualUint64x2 x y) - // result: (VPMOVMToVec64x2 (VPCMPUQ128 [13] x y)) + // match: (DiffWithCeilWithPrecisionFloat64x4 [a] x) + // result: (VREDUCEPD256 [a+2] x) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec64x2) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ128, typ.Mask) - v0.AuxInt = int8ToAuxInt(13) - v0.AddArg2(x, y) - v.AddArg(v0) + v.reset(OpAMD64VREDUCEPD256) + v.AuxInt = int8ToAuxInt(a + 2) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpGreaterEqualUint64x4(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpDiffWithCeilWithPrecisionFloat64x8(v *Value) bool { v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterEqualUint64x4 x y) - // result: (VPMOVMToVec64x4 (VPCMPUQ256 [13] x y)) + // match: (DiffWithCeilWithPrecisionFloat64x8 [a] x) + // result: (VREDUCEPD512 [a+2] x) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec64x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ256, typ.Mask) - v0.AuxInt = int8ToAuxInt(13) - v0.AddArg2(x, y) - v.AddArg(v0) + v.reset(OpAMD64VREDUCEPD512) + v.AuxInt = int8ToAuxInt(a + 2) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpGreaterEqualUint64x8(v *Value) bool { +func rewriteValueAMD64_OpDiffWithCeilWithPrecisionMaskedFloat32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterEqualUint64x8 x y) - // result: (VPMOVMToVec64x8 (VPCMPUQ512 [13] x y)) + // match: (DiffWithCeilWithPrecisionMaskedFloat32x16 [a] x mask) + // result: (VREDUCEPSMasked512 [a+2] x (VPMOVVec32x16ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ512, typ.Mask) - v0.AuxInt = int8ToAuxInt(13) - v0.AddArg2(x, y) - v.AddArg(v0) + mask := v_1 + v.reset(OpAMD64VREDUCEPSMasked512) + v.AuxInt = int8ToAuxInt(a + 2) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpGreaterEqualUint8x16(v *Value) bool { +func rewriteValueAMD64_OpDiffWithCeilWithPrecisionMaskedFloat32x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterEqualUint8x16 x y) - // result: (VPMOVMToVec8x16 (VPCMPUB128 [13] x y)) + // match: (DiffWithCeilWithPrecisionMaskedFloat32x4 [a] x mask) + // result: (VREDUCEPSMasked128 [a+2] x (VPMOVVec32x4ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec8x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB128, typ.Mask) - v0.AuxInt = int8ToAuxInt(13) - v0.AddArg2(x, y) - v.AddArg(v0) + mask := v_1 + v.reset(OpAMD64VREDUCEPSMasked128) + v.AuxInt = int8ToAuxInt(a + 2) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpGreaterEqualUint8x32(v *Value) bool { +func rewriteValueAMD64_OpDiffWithCeilWithPrecisionMaskedFloat32x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterEqualUint8x32 x y) - // result: (VPMOVMToVec8x32 (VPCMPUB256 [13] x y)) + // match: (DiffWithCeilWithPrecisionMaskedFloat32x8 [a] x mask) + // result: (VREDUCEPSMasked256 [a+2] x (VPMOVVec32x8ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec8x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB256, typ.Mask) - v0.AuxInt = int8ToAuxInt(13) - v0.AddArg2(x, y) - v.AddArg(v0) + mask := v_1 + v.reset(OpAMD64VREDUCEPSMasked256) + v.AuxInt = int8ToAuxInt(a + 2) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpGreaterEqualUint8x64(v *Value) bool { +func rewriteValueAMD64_OpDiffWithCeilWithPrecisionMaskedFloat64x2(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterEqualUint8x64 x y) - // result: (VPMOVMToVec8x64 (VPCMPUB512 [13] x y)) + // match: (DiffWithCeilWithPrecisionMaskedFloat64x2 [a] x mask) + // result: (VREDUCEPDMasked128 [a+2] x (VPMOVVec64x2ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec8x64) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB512, typ.Mask) - v0.AuxInt = int8ToAuxInt(13) - v0.AddArg2(x, y) - v.AddArg(v0) + mask := v_1 + v.reset(OpAMD64VREDUCEPDMasked128) + v.AuxInt = int8ToAuxInt(a + 2) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpGreaterFloat32x16(v *Value) bool { +func rewriteValueAMD64_OpDiffWithCeilWithPrecisionMaskedFloat64x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterFloat32x16 x y) - // result: (VPMOVMToVec32x16 (VCMPPS512 [14] x y)) + // match: (DiffWithCeilWithPrecisionMaskedFloat64x4 [a] x mask) + // result: (VREDUCEPDMasked256 [a+2] x (VPMOVVec64x4ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask) - v0.AuxInt = int8ToAuxInt(14) - v0.AddArg2(x, y) - v.AddArg(v0) + mask := v_1 + v.reset(OpAMD64VREDUCEPDMasked256) + v.AuxInt = int8ToAuxInt(a + 2) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpGreaterFloat32x4(v *Value) bool { +func rewriteValueAMD64_OpDiffWithCeilWithPrecisionMaskedFloat64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (GreaterFloat32x4 x y) - // result: (VCMPPS128 [14] x y) + b := v.Block + // match: (DiffWithCeilWithPrecisionMaskedFloat64x8 [a] x mask) + // result: (VREDUCEPDMasked512 [a+2] x (VPMOVVec64x8ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VCMPPS128) - v.AuxInt = int8ToAuxInt(14) - v.AddArg2(x, y) + mask := v_1 + v.reset(OpAMD64VREDUCEPDMasked512) + v.AuxInt = int8ToAuxInt(a + 2) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpGreaterFloat32x8(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpDiffWithFloorWithPrecisionFloat32x16(v *Value) bool { v_0 := v.Args[0] - // match: (GreaterFloat32x8 x y) - // result: (VCMPPS256 [14] x y) + // match: (DiffWithFloorWithPrecisionFloat32x16 [a] x) + // result: (VREDUCEPS512 [a+1] x) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VCMPPS256) - v.AuxInt = int8ToAuxInt(14) - v.AddArg2(x, y) + v.reset(OpAMD64VREDUCEPS512) + v.AuxInt = int8ToAuxInt(a + 1) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpGreaterFloat64x2(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpDiffWithFloorWithPrecisionFloat32x4(v *Value) bool { v_0 := v.Args[0] - // match: (GreaterFloat64x2 x y) - // result: (VCMPPD128 [14] x y) + // match: (DiffWithFloorWithPrecisionFloat32x4 [a] x) + // result: (VREDUCEPS128 [a+1] x) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VCMPPD128) - v.AuxInt = int8ToAuxInt(14) - v.AddArg2(x, y) + v.reset(OpAMD64VREDUCEPS128) + v.AuxInt = int8ToAuxInt(a + 1) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpGreaterFloat64x4(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpDiffWithFloorWithPrecisionFloat32x8(v *Value) bool { v_0 := v.Args[0] - // match: (GreaterFloat64x4 x y) - // result: (VCMPPD256 [14] x y) + // match: (DiffWithFloorWithPrecisionFloat32x8 [a] x) + // result: (VREDUCEPS256 [a+1] x) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VCMPPD256) - v.AuxInt = int8ToAuxInt(14) - v.AddArg2(x, y) + v.reset(OpAMD64VREDUCEPS256) + v.AuxInt = int8ToAuxInt(a + 1) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpGreaterFloat64x8(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpDiffWithFloorWithPrecisionFloat64x2(v *Value) bool { v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterFloat64x8 x y) - // result: (VPMOVMToVec64x8 (VCMPPD512 [14] x y)) + // match: (DiffWithFloorWithPrecisionFloat64x2 [a] x) + // result: (VREDUCEPD128 [a+1] x) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask) - v0.AuxInt = int8ToAuxInt(14) - v0.AddArg2(x, y) - v.AddArg(v0) + v.reset(OpAMD64VREDUCEPD128) + v.AuxInt = int8ToAuxInt(a + 1) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpGreaterInt16x32(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpDiffWithFloorWithPrecisionFloat64x4(v *Value) bool { v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterInt16x32 x y) - // result: (VPMOVMToVec16x32 (VPCMPW512 [14] x y)) + // match: (DiffWithFloorWithPrecisionFloat64x4 [a] x) + // result: (VREDUCEPD256 [a+1] x) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec16x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW512, typ.Mask) - v0.AuxInt = int8ToAuxInt(14) - v0.AddArg2(x, y) - v.AddArg(v0) + v.reset(OpAMD64VREDUCEPD256) + v.AuxInt = int8ToAuxInt(a + 1) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpGreaterInt32x16(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpDiffWithFloorWithPrecisionFloat64x8(v *Value) bool { v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterInt32x16 x y) - // result: (VPMOVMToVec32x16 (VPCMPD512 [14] x y)) + // match: (DiffWithFloorWithPrecisionFloat64x8 [a] x) + // result: (VREDUCEPD512 [a+1] x) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD512, typ.Mask) - v0.AuxInt = int8ToAuxInt(14) - v0.AddArg2(x, y) - v.AddArg(v0) + v.reset(OpAMD64VREDUCEPD512) + v.AuxInt = int8ToAuxInt(a + 1) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpGreaterInt64x2(v *Value) bool { +func rewriteValueAMD64_OpDiffWithFloorWithPrecisionMaskedFloat32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterInt64x2 x y) - // result: (VPMOVMToVec64x2 (VPCMPQ128 [14] x y)) + // match: (DiffWithFloorWithPrecisionMaskedFloat32x16 [a] x mask) + // result: (VREDUCEPSMasked512 [a+1] x (VPMOVVec32x16ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec64x2) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ128, typ.Mask) - v0.AuxInt = int8ToAuxInt(14) - v0.AddArg2(x, y) - v.AddArg(v0) + mask := v_1 + v.reset(OpAMD64VREDUCEPSMasked512) + v.AuxInt = int8ToAuxInt(a + 1) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpGreaterInt64x8(v *Value) bool { +func rewriteValueAMD64_OpDiffWithFloorWithPrecisionMaskedFloat32x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterInt64x8 x y) - // result: (VPMOVMToVec64x8 (VPCMPQ512 [14] x y)) + // match: (DiffWithFloorWithPrecisionMaskedFloat32x4 [a] x mask) + // result: (VREDUCEPSMasked128 [a+1] x (VPMOVVec32x4ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ512, typ.Mask) - v0.AuxInt = int8ToAuxInt(14) - v0.AddArg2(x, y) - v.AddArg(v0) + mask := v_1 + v.reset(OpAMD64VREDUCEPSMasked128) + v.AuxInt = int8ToAuxInt(a + 1) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpGreaterInt8x64(v *Value) bool { +func rewriteValueAMD64_OpDiffWithFloorWithPrecisionMaskedFloat32x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterInt8x64 x y) - // result: (VPMOVMToVec8x64 (VPCMPB512 [14] x y)) + // match: (DiffWithFloorWithPrecisionMaskedFloat32x8 [a] x mask) + // result: (VREDUCEPSMasked256 [a+1] x (VPMOVVec32x8ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec8x64) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB512, typ.Mask) - v0.AuxInt = int8ToAuxInt(14) - v0.AddArg2(x, y) - v.AddArg(v0) + mask := v_1 + v.reset(OpAMD64VREDUCEPSMasked256) + v.AuxInt = int8ToAuxInt(a + 1) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpGreaterUint16x16(v *Value) bool { +func rewriteValueAMD64_OpDiffWithFloorWithPrecisionMaskedFloat64x2(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterUint16x16 x y) - // result: (VPMOVMToVec16x16 (VPCMPUW256 [14] x y)) + // match: (DiffWithFloorWithPrecisionMaskedFloat64x2 [a] x mask) + // result: (VREDUCEPDMasked128 [a+1] x (VPMOVVec64x2ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec16x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW256, typ.Mask) - v0.AuxInt = int8ToAuxInt(14) - v0.AddArg2(x, y) - v.AddArg(v0) + mask := v_1 + v.reset(OpAMD64VREDUCEPDMasked128) + v.AuxInt = int8ToAuxInt(a + 1) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpGreaterUint16x32(v *Value) bool { +func rewriteValueAMD64_OpDiffWithFloorWithPrecisionMaskedFloat64x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterUint16x32 x y) - // result: (VPMOVMToVec16x32 (VPCMPUW512 [14] x y)) + // match: (DiffWithFloorWithPrecisionMaskedFloat64x4 [a] x mask) + // result: (VREDUCEPDMasked256 [a+1] x (VPMOVVec64x4ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec16x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW512, typ.Mask) - v0.AuxInt = int8ToAuxInt(14) - v0.AddArg2(x, y) - v.AddArg(v0) + mask := v_1 + v.reset(OpAMD64VREDUCEPDMasked256) + v.AuxInt = int8ToAuxInt(a + 1) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpGreaterUint16x8(v *Value) bool { +func rewriteValueAMD64_OpDiffWithFloorWithPrecisionMaskedFloat64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterUint16x8 x y) - // result: (VPMOVMToVec16x8 (VPCMPUW128 [14] x y)) + // match: (DiffWithFloorWithPrecisionMaskedFloat64x8 [a] x mask) + // result: (VREDUCEPDMasked512 [a+1] x (VPMOVVec64x8ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec16x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW128, typ.Mask) - v0.AuxInt = int8ToAuxInt(14) - v0.AddArg2(x, y) - v.AddArg(v0) + mask := v_1 + v.reset(OpAMD64VREDUCEPDMasked512) + v.AuxInt = int8ToAuxInt(a + 1) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpGreaterUint32x16(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpDiffWithRoundWithPrecisionFloat32x16(v *Value) bool { v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterUint32x16 x y) - // result: (VPMOVMToVec32x16 (VPCMPUD512 [14] x y)) + // match: (DiffWithRoundWithPrecisionFloat32x16 [a] x) + // result: (VREDUCEPS512 [a+0] x) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD512, typ.Mask) - v0.AuxInt = int8ToAuxInt(14) - v0.AddArg2(x, y) - v.AddArg(v0) + v.reset(OpAMD64VREDUCEPS512) + v.AuxInt = int8ToAuxInt(a + 0) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpGreaterUint32x4(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpDiffWithRoundWithPrecisionFloat32x4(v *Value) bool { v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterUint32x4 x y) - // result: (VPMOVMToVec32x4 (VPCMPUD128 [14] x y)) + // match: (DiffWithRoundWithPrecisionFloat32x4 [a] x) + // result: (VREDUCEPS128 [a+0] x) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec32x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD128, typ.Mask) - v0.AuxInt = int8ToAuxInt(14) - v0.AddArg2(x, y) - v.AddArg(v0) + v.reset(OpAMD64VREDUCEPS128) + v.AuxInt = int8ToAuxInt(a + 0) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpGreaterUint32x8(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpDiffWithRoundWithPrecisionFloat32x8(v *Value) bool { v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterUint32x8 x y) - // result: (VPMOVMToVec32x8 (VPCMPUD256 [14] x y)) + // match: (DiffWithRoundWithPrecisionFloat32x8 [a] x) + // result: (VREDUCEPS256 [a+0] x) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec32x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD256, typ.Mask) - v0.AuxInt = int8ToAuxInt(14) - v0.AddArg2(x, y) - v.AddArg(v0) + v.reset(OpAMD64VREDUCEPS256) + v.AuxInt = int8ToAuxInt(a + 0) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpGreaterUint64x2(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpDiffWithRoundWithPrecisionFloat64x2(v *Value) bool { v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterUint64x2 x y) - // result: (VPMOVMToVec64x2 (VPCMPUQ128 [14] x y)) + // match: (DiffWithRoundWithPrecisionFloat64x2 [a] x) + // result: (VREDUCEPD128 [a+0] x) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec64x2) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ128, typ.Mask) - v0.AuxInt = int8ToAuxInt(14) - v0.AddArg2(x, y) - v.AddArg(v0) + v.reset(OpAMD64VREDUCEPD128) + v.AuxInt = int8ToAuxInt(a + 0) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpGreaterUint64x4(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpDiffWithRoundWithPrecisionFloat64x4(v *Value) bool { v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterUint64x4 x y) - // result: (VPMOVMToVec64x4 (VPCMPUQ256 [14] x y)) + // match: (DiffWithRoundWithPrecisionFloat64x4 [a] x) + // result: (VREDUCEPD256 [a+0] x) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec64x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ256, typ.Mask) - v0.AuxInt = int8ToAuxInt(14) - v0.AddArg2(x, y) - v.AddArg(v0) + v.reset(OpAMD64VREDUCEPD256) + v.AuxInt = int8ToAuxInt(a + 0) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpGreaterUint64x8(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpDiffWithRoundWithPrecisionFloat64x8(v *Value) bool { v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterUint64x8 x y) - // result: (VPMOVMToVec64x8 (VPCMPUQ512 [14] x y)) + // match: (DiffWithRoundWithPrecisionFloat64x8 [a] x) + // result: (VREDUCEPD512 [a+0] x) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ512, typ.Mask) - v0.AuxInt = int8ToAuxInt(14) - v0.AddArg2(x, y) - v.AddArg(v0) + v.reset(OpAMD64VREDUCEPD512) + v.AuxInt = int8ToAuxInt(a + 0) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpGreaterUint8x16(v *Value) bool { +func rewriteValueAMD64_OpDiffWithRoundWithPrecisionMaskedFloat32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterUint8x16 x y) - // result: (VPMOVMToVec8x16 (VPCMPUB128 [14] x y)) + // match: (DiffWithRoundWithPrecisionMaskedFloat32x16 [a] x mask) + // result: (VREDUCEPSMasked512 [a+0] x (VPMOVVec32x16ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec8x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB128, typ.Mask) - v0.AuxInt = int8ToAuxInt(14) - v0.AddArg2(x, y) - v.AddArg(v0) + mask := v_1 + v.reset(OpAMD64VREDUCEPSMasked512) + v.AuxInt = int8ToAuxInt(a + 0) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpGreaterUint8x32(v *Value) bool { +func rewriteValueAMD64_OpDiffWithRoundWithPrecisionMaskedFloat32x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterUint8x32 x y) - // result: (VPMOVMToVec8x32 (VPCMPUB256 [14] x y)) + // match: (DiffWithRoundWithPrecisionMaskedFloat32x4 [a] x mask) + // result: (VREDUCEPSMasked128 [a+0] x (VPMOVVec32x4ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec8x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB256, typ.Mask) - v0.AuxInt = int8ToAuxInt(14) - v0.AddArg2(x, y) - v.AddArg(v0) + mask := v_1 + v.reset(OpAMD64VREDUCEPSMasked128) + v.AuxInt = int8ToAuxInt(a + 0) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpGreaterUint8x64(v *Value) bool { +func rewriteValueAMD64_OpDiffWithRoundWithPrecisionMaskedFloat32x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterUint8x64 x y) - // result: (VPMOVMToVec8x64 (VPCMPUB512 [14] x y)) + // match: (DiffWithRoundWithPrecisionMaskedFloat32x8 [a] x mask) + // result: (VREDUCEPSMasked256 [a+0] x (VPMOVVec32x8ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec8x64) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB512, typ.Mask) - v0.AuxInt = int8ToAuxInt(14) - v0.AddArg2(x, y) - v.AddArg(v0) + mask := v_1 + v.reset(OpAMD64VREDUCEPSMasked256) + v.AuxInt = int8ToAuxInt(a + 0) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpHasCPUFeature(v *Value) bool { +func rewriteValueAMD64_OpDiffWithRoundWithPrecisionMaskedFloat64x2(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (HasCPUFeature {s}) - // result: (SETNE (CMPLconst [0] (LoweredHasCPUFeature {s}))) + // match: (DiffWithRoundWithPrecisionMaskedFloat64x2 [a] x mask) + // result: (VREDUCEPDMasked128 [a+0] x (VPMOVVec64x2ToM mask)) for { - s := auxToSym(v.Aux) - v.reset(OpAMD64SETNE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) - v0.AuxInt = int32ToAuxInt(0) - v1 := b.NewValue0(v.Pos, OpAMD64LoweredHasCPUFeature, typ.UInt64) - v1.Aux = symToAux(s) - v0.AddArg(v1) - v.AddArg(v0) + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VREDUCEPDMasked128) + v.AuxInt = int8ToAuxInt(a + 0) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpIsInBounds(v *Value) bool { +func rewriteValueAMD64_OpDiffWithRoundWithPrecisionMaskedFloat64x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (IsInBounds idx len) - // result: (SETB (CMPQ idx len)) + // match: (DiffWithRoundWithPrecisionMaskedFloat64x4 [a] x mask) + // result: (VREDUCEPDMasked256 [a+0] x (VPMOVVec64x4ToM mask)) for { - idx := v_0 - len := v_1 - v.reset(OpAMD64SETB) - v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags) - v0.AddArg2(idx, len) - v.AddArg(v0) + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VREDUCEPDMasked256) + v.AuxInt = int8ToAuxInt(a + 0) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpIsNanFloat32x16(v *Value) bool { +func rewriteValueAMD64_OpDiffWithRoundWithPrecisionMaskedFloat64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (IsNanFloat32x16 x y) - // result: (VPMOVMToVec32x16 (VCMPPS512 [3] x y)) + // match: (DiffWithRoundWithPrecisionMaskedFloat64x8 [a] x mask) + // result: (VREDUCEPDMasked512 [a+0] x (VPMOVVec64x8ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask) - v0.AuxInt = int8ToAuxInt(3) - v0.AddArg2(x, y) - v.AddArg(v0) + mask := v_1 + v.reset(OpAMD64VREDUCEPDMasked512) + v.AuxInt = int8ToAuxInt(a + 0) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpIsNanFloat32x4(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpDiffWithTruncWithPrecisionFloat32x16(v *Value) bool { v_0 := v.Args[0] - // match: (IsNanFloat32x4 x y) - // result: (VCMPPS128 [3] x y) + // match: (DiffWithTruncWithPrecisionFloat32x16 [a] x) + // result: (VREDUCEPS512 [a+3] x) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VCMPPS128) - v.AuxInt = int8ToAuxInt(3) - v.AddArg2(x, y) + v.reset(OpAMD64VREDUCEPS512) + v.AuxInt = int8ToAuxInt(a + 3) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpIsNanFloat32x8(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpDiffWithTruncWithPrecisionFloat32x4(v *Value) bool { v_0 := v.Args[0] - // match: (IsNanFloat32x8 x y) - // result: (VCMPPS256 [3] x y) + // match: (DiffWithTruncWithPrecisionFloat32x4 [a] x) + // result: (VREDUCEPS128 [a+3] x) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VCMPPS256) - v.AuxInt = int8ToAuxInt(3) - v.AddArg2(x, y) + v.reset(OpAMD64VREDUCEPS128) + v.AuxInt = int8ToAuxInt(a + 3) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpIsNanFloat64x2(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpDiffWithTruncWithPrecisionFloat32x8(v *Value) bool { v_0 := v.Args[0] - // match: (IsNanFloat64x2 x y) - // result: (VCMPPD128 [3] x y) + // match: (DiffWithTruncWithPrecisionFloat32x8 [a] x) + // result: (VREDUCEPS256 [a+3] x) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VCMPPD128) - v.AuxInt = int8ToAuxInt(3) - v.AddArg2(x, y) + v.reset(OpAMD64VREDUCEPS256) + v.AuxInt = int8ToAuxInt(a + 3) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpIsNanFloat64x4(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpDiffWithTruncWithPrecisionFloat64x2(v *Value) bool { v_0 := v.Args[0] - // match: (IsNanFloat64x4 x y) - // result: (VCMPPD256 [3] x y) + // match: (DiffWithTruncWithPrecisionFloat64x2 [a] x) + // result: (VREDUCEPD128 [a+3] x) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VCMPPD256) - v.AuxInt = int8ToAuxInt(3) - v.AddArg2(x, y) + v.reset(OpAMD64VREDUCEPD128) + v.AuxInt = int8ToAuxInt(a + 3) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpIsNanFloat64x8(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpDiffWithTruncWithPrecisionFloat64x4(v *Value) bool { v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (IsNanFloat64x8 x y) - // result: (VPMOVMToVec64x8 (VCMPPD512 [3] x y)) + // match: (DiffWithTruncWithPrecisionFloat64x4 [a] x) + // result: (VREDUCEPD256 [a+3] x) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask) - v0.AuxInt = int8ToAuxInt(3) - v0.AddArg2(x, y) - v.AddArg(v0) + v.reset(OpAMD64VREDUCEPD256) + v.AuxInt = int8ToAuxInt(a + 3) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpIsNonNil(v *Value) bool { +func rewriteValueAMD64_OpDiffWithTruncWithPrecisionFloat64x8(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (IsNonNil p) - // result: (SETNE (TESTQ p p)) + // match: (DiffWithTruncWithPrecisionFloat64x8 [a] x) + // result: (VREDUCEPD512 [a+3] x) for { - p := v_0 - v.reset(OpAMD64SETNE) - v0 := b.NewValue0(v.Pos, OpAMD64TESTQ, types.TypeFlags) - v0.AddArg2(p, p) - v.AddArg(v0) + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VREDUCEPD512) + v.AuxInt = int8ToAuxInt(a + 3) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpIsSliceInBounds(v *Value) bool { +func rewriteValueAMD64_OpDiffWithTruncWithPrecisionMaskedFloat32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (IsSliceInBounds idx len) - // result: (SETBE (CMPQ idx len)) + // match: (DiffWithTruncWithPrecisionMaskedFloat32x16 [a] x mask) + // result: (VREDUCEPSMasked512 [a+3] x (VPMOVVec32x16ToM mask)) for { - idx := v_0 - len := v_1 - v.reset(OpAMD64SETBE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags) - v0.AddArg2(idx, len) - v.AddArg(v0) + a := auxIntToInt8(v.AuxInt) + x := v_0 + mask := v_1 + v.reset(OpAMD64VREDUCEPSMasked512) + v.AuxInt = int8ToAuxInt(a + 3) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpLeq16(v *Value) bool { +func rewriteValueAMD64_OpDiffWithTruncWithPrecisionMaskedFloat32x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Leq16 x y) - // result: (SETLE (CMPW x y)) + // match: (DiffWithTruncWithPrecisionMaskedFloat32x4 [a] x mask) + // result: (VREDUCEPSMasked128 [a+3] x (VPMOVVec32x4ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64SETLE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPW, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) + mask := v_1 + v.reset(OpAMD64VREDUCEPSMasked128) + v.AuxInt = int8ToAuxInt(a + 3) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpLeq16U(v *Value) bool { +func rewriteValueAMD64_OpDiffWithTruncWithPrecisionMaskedFloat32x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Leq16U x y) - // result: (SETBE (CMPW x y)) + // match: (DiffWithTruncWithPrecisionMaskedFloat32x8 [a] x mask) + // result: (VREDUCEPSMasked256 [a+3] x (VPMOVVec32x8ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64SETBE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPW, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) + mask := v_1 + v.reset(OpAMD64VREDUCEPSMasked256) + v.AuxInt = int8ToAuxInt(a + 3) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpLeq32(v *Value) bool { +func rewriteValueAMD64_OpDiffWithTruncWithPrecisionMaskedFloat64x2(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Leq32 x y) - // result: (SETLE (CMPL x y)) + // match: (DiffWithTruncWithPrecisionMaskedFloat64x2 [a] x mask) + // result: (VREDUCEPDMasked128 [a+3] x (VPMOVVec64x2ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64SETLE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPL, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) + mask := v_1 + v.reset(OpAMD64VREDUCEPDMasked128) + v.AuxInt = int8ToAuxInt(a + 3) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpLeq32F(v *Value) bool { +func rewriteValueAMD64_OpDiffWithTruncWithPrecisionMaskedFloat64x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Leq32F x y) - // result: (SETGEF (UCOMISS y x)) + // match: (DiffWithTruncWithPrecisionMaskedFloat64x4 [a] x mask) + // result: (VREDUCEPDMasked256 [a+3] x (VPMOVVec64x4ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64SETGEF) - v0 := b.NewValue0(v.Pos, OpAMD64UCOMISS, types.TypeFlags) - v0.AddArg2(y, x) - v.AddArg(v0) + mask := v_1 + v.reset(OpAMD64VREDUCEPDMasked256) + v.AuxInt = int8ToAuxInt(a + 3) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpLeq32U(v *Value) bool { +func rewriteValueAMD64_OpDiffWithTruncWithPrecisionMaskedFloat64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Leq32U x y) - // result: (SETBE (CMPL x y)) + // match: (DiffWithTruncWithPrecisionMaskedFloat64x8 [a] x mask) + // result: (VREDUCEPDMasked512 [a+3] x (VPMOVVec64x8ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64SETBE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPL, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) + mask := v_1 + v.reset(OpAMD64VREDUCEPDMasked512) + v.AuxInt = int8ToAuxInt(a + 3) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpLeq64(v *Value) bool { +func rewriteValueAMD64_OpDiv16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Leq64 x y) - // result: (SETLE (CMPQ x y)) + typ := &b.Func.Config.Types + // match: (Div16 [a] x y) + // result: (Select0 (DIVW [a] x y)) for { + a := auxIntToBool(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64SETLE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags) + v.reset(OpSelect0) + v0 := b.NewValue0(v.Pos, OpAMD64DIVW, types.NewTuple(typ.Int16, typ.Int16)) + v0.AuxInt = boolToAuxInt(a) v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLeq64F(v *Value) bool { +func rewriteValueAMD64_OpDiv16u(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Leq64F x y) - // result: (SETGEF (UCOMISD y x)) + typ := &b.Func.Config.Types + // match: (Div16u x y) + // result: (Select0 (DIVWU x y)) for { x := v_0 y := v_1 - v.reset(OpAMD64SETGEF) - v0 := b.NewValue0(v.Pos, OpAMD64UCOMISD, types.TypeFlags) - v0.AddArg2(y, x) + v.reset(OpSelect0) + v0 := b.NewValue0(v.Pos, OpAMD64DIVWU, types.NewTuple(typ.UInt16, typ.UInt16)) + v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLeq64U(v *Value) bool { +func rewriteValueAMD64_OpDiv32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Leq64U x y) - // result: (SETBE (CMPQ x y)) + typ := &b.Func.Config.Types + // match: (Div32 [a] x y) + // result: (Select0 (DIVL [a] x y)) for { + a := auxIntToBool(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64SETBE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags) + v.reset(OpSelect0) + v0 := b.NewValue0(v.Pos, OpAMD64DIVL, types.NewTuple(typ.Int32, typ.Int32)) + v0.AuxInt = boolToAuxInt(a) v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLeq8(v *Value) bool { +func rewriteValueAMD64_OpDiv32u(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Leq8 x y) - // result: (SETLE (CMPB x y)) + typ := &b.Func.Config.Types + // match: (Div32u x y) + // result: (Select0 (DIVLU x y)) for { x := v_0 y := v_1 - v.reset(OpAMD64SETLE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags) + v.reset(OpSelect0) + v0 := b.NewValue0(v.Pos, OpAMD64DIVLU, types.NewTuple(typ.UInt32, typ.UInt32)) v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLeq8U(v *Value) bool { +func rewriteValueAMD64_OpDiv64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Leq8U x y) - // result: (SETBE (CMPB x y)) + typ := &b.Func.Config.Types + // match: (Div64 [a] x y) + // result: (Select0 (DIVQ [a] x y)) for { + a := auxIntToBool(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64SETBE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags) + v.reset(OpSelect0) + v0 := b.NewValue0(v.Pos, OpAMD64DIVQ, types.NewTuple(typ.Int64, typ.Int64)) + v0.AuxInt = boolToAuxInt(a) v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLess16(v *Value) bool { +func rewriteValueAMD64_OpDiv64u(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Less16 x y) - // result: (SETL (CMPW x y)) + typ := &b.Func.Config.Types + // match: (Div64u x y) + // result: (Select0 (DIVQU x y)) for { x := v_0 y := v_1 - v.reset(OpAMD64SETL) - v0 := b.NewValue0(v.Pos, OpAMD64CMPW, types.TypeFlags) + v.reset(OpSelect0) + v0 := b.NewValue0(v.Pos, OpAMD64DIVQU, types.NewTuple(typ.UInt64, typ.UInt64)) v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLess16U(v *Value) bool { +func rewriteValueAMD64_OpDiv8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Less16U x y) - // result: (SETB (CMPW x y)) + typ := &b.Func.Config.Types + // match: (Div8 x y) + // result: (Select0 (DIVW (SignExt8to16 x) (SignExt8to16 y))) for { x := v_0 y := v_1 - v.reset(OpAMD64SETB) - v0 := b.NewValue0(v.Pos, OpAMD64CMPW, types.TypeFlags) - v0.AddArg2(x, y) + v.reset(OpSelect0) + v0 := b.NewValue0(v.Pos, OpAMD64DIVW, types.NewTuple(typ.Int16, typ.Int16)) + v1 := b.NewValue0(v.Pos, OpSignExt8to16, typ.Int16) + v1.AddArg(x) + v2 := b.NewValue0(v.Pos, OpSignExt8to16, typ.Int16) + v2.AddArg(y) + v0.AddArg2(v1, v2) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLess32(v *Value) bool { +func rewriteValueAMD64_OpDiv8u(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Less32 x y) - // result: (SETL (CMPL x y)) + typ := &b.Func.Config.Types + // match: (Div8u x y) + // result: (Select0 (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y))) for { x := v_0 y := v_1 - v.reset(OpAMD64SETL) - v0 := b.NewValue0(v.Pos, OpAMD64CMPL, types.TypeFlags) - v0.AddArg2(x, y) + v.reset(OpSelect0) + v0 := b.NewValue0(v.Pos, OpAMD64DIVWU, types.NewTuple(typ.UInt16, typ.UInt16)) + v1 := b.NewValue0(v.Pos, OpZeroExt8to16, typ.UInt16) + v1.AddArg(x) + v2 := b.NewValue0(v.Pos, OpZeroExt8to16, typ.UInt16) + v2.AddArg(y) + v0.AddArg2(v1, v2) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLess32F(v *Value) bool { +func rewriteValueAMD64_OpDivMaskedFloat32x16(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Less32F x y) - // result: (SETGF (UCOMISS y x)) + // match: (DivMaskedFloat32x16 x y mask) + // result: (VDIVPSMasked512 x y (VPMOVVec32x16ToM mask)) for { x := v_0 y := v_1 - v.reset(OpAMD64SETGF) - v0 := b.NewValue0(v.Pos, OpAMD64UCOMISS, types.TypeFlags) - v0.AddArg2(y, x) - v.AddArg(v0) + mask := v_2 + v.reset(OpAMD64VDIVPSMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpLess32U(v *Value) bool { +func rewriteValueAMD64_OpDivMaskedFloat32x4(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Less32U x y) - // result: (SETB (CMPL x y)) + // match: (DivMaskedFloat32x4 x y mask) + // result: (VDIVPSMasked128 x y (VPMOVVec32x4ToM mask)) for { x := v_0 y := v_1 - v.reset(OpAMD64SETB) - v0 := b.NewValue0(v.Pos, OpAMD64CMPL, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) + mask := v_2 + v.reset(OpAMD64VDIVPSMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpLess64(v *Value) bool { +func rewriteValueAMD64_OpDivMaskedFloat32x8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Less64 x y) - // result: (SETL (CMPQ x y)) + // match: (DivMaskedFloat32x8 x y mask) + // result: (VDIVPSMasked256 x y (VPMOVVec32x8ToM mask)) for { x := v_0 y := v_1 - v.reset(OpAMD64SETL) - v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) + mask := v_2 + v.reset(OpAMD64VDIVPSMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpLess64F(v *Value) bool { +func rewriteValueAMD64_OpDivMaskedFloat64x2(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Less64F x y) - // result: (SETGF (UCOMISD y x)) + // match: (DivMaskedFloat64x2 x y mask) + // result: (VDIVPDMasked128 x y (VPMOVVec64x2ToM mask)) for { x := v_0 y := v_1 - v.reset(OpAMD64SETGF) - v0 := b.NewValue0(v.Pos, OpAMD64UCOMISD, types.TypeFlags) - v0.AddArg2(y, x) - v.AddArg(v0) + mask := v_2 + v.reset(OpAMD64VDIVPDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpLess64U(v *Value) bool { +func rewriteValueAMD64_OpDivMaskedFloat64x4(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Less64U x y) - // result: (SETB (CMPQ x y)) + // match: (DivMaskedFloat64x4 x y mask) + // result: (VDIVPDMasked256 x y (VPMOVVec64x4ToM mask)) for { x := v_0 y := v_1 - v.reset(OpAMD64SETB) - v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) + mask := v_2 + v.reset(OpAMD64VDIVPDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpLess8(v *Value) bool { +func rewriteValueAMD64_OpDivMaskedFloat64x8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Less8 x y) - // result: (SETL (CMPB x y)) + // match: (DivMaskedFloat64x8 x y mask) + // result: (VDIVPDMasked512 x y (VPMOVVec64x8ToM mask)) for { x := v_0 y := v_1 - v.reset(OpAMD64SETL) - v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) + mask := v_2 + v.reset(OpAMD64VDIVPDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpLess8U(v *Value) bool { +func rewriteValueAMD64_OpDotProdBroadcastFloat64x2(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (Less8U x y) - // result: (SETB (CMPB x y)) + // match: (DotProdBroadcastFloat64x2 x y) + // result: (VDPPD128 [127] x y) for { x := v_0 y := v_1 - v.reset(OpAMD64SETB) - v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) + v.reset(OpAMD64VDPPD128) + v.AuxInt = int8ToAuxInt(127) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpLessEqualFloat32x16(v *Value) bool { +func rewriteValueAMD64_OpEq16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (LessEqualFloat32x16 x y) - // result: (VPMOVMToVec32x16 (VCMPPS512 [2] x y)) + // match: (Eq16 x y) + // result: (SETEQ (CMPW x y)) for { x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask) - v0.AuxInt = int8ToAuxInt(2) + v.reset(OpAMD64SETEQ) + v0 := b.NewValue0(v.Pos, OpAMD64CMPW, types.TypeFlags) v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLessEqualFloat32x4(v *Value) bool { +func rewriteValueAMD64_OpEq32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (LessEqualFloat32x4 x y) - // result: (VCMPPS128 [2] x y) + b := v.Block + // match: (Eq32 x y) + // result: (SETEQ (CMPL x y)) for { x := v_0 y := v_1 - v.reset(OpAMD64VCMPPS128) - v.AuxInt = int8ToAuxInt(2) - v.AddArg2(x, y) + v.reset(OpAMD64SETEQ) + v0 := b.NewValue0(v.Pos, OpAMD64CMPL, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLessEqualFloat32x8(v *Value) bool { +func rewriteValueAMD64_OpEq32F(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (LessEqualFloat32x8 x y) - // result: (VCMPPS256 [2] x y) + b := v.Block + // match: (Eq32F x y) + // result: (SETEQF (UCOMISS x y)) for { x := v_0 y := v_1 - v.reset(OpAMD64VCMPPS256) - v.AuxInt = int8ToAuxInt(2) - v.AddArg2(x, y) + v.reset(OpAMD64SETEQF) + v0 := b.NewValue0(v.Pos, OpAMD64UCOMISS, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLessEqualFloat64x2(v *Value) bool { +func rewriteValueAMD64_OpEq64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (LessEqualFloat64x2 x y) - // result: (VCMPPD128 [2] x y) + b := v.Block + // match: (Eq64 x y) + // result: (SETEQ (CMPQ x y)) for { x := v_0 y := v_1 - v.reset(OpAMD64VCMPPD128) - v.AuxInt = int8ToAuxInt(2) - v.AddArg2(x, y) + v.reset(OpAMD64SETEQ) + v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLessEqualFloat64x4(v *Value) bool { +func rewriteValueAMD64_OpEq64F(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (LessEqualFloat64x4 x y) - // result: (VCMPPD256 [2] x y) + b := v.Block + // match: (Eq64F x y) + // result: (SETEQF (UCOMISD x y)) for { x := v_0 y := v_1 - v.reset(OpAMD64VCMPPD256) - v.AuxInt = int8ToAuxInt(2) - v.AddArg2(x, y) + v.reset(OpAMD64SETEQF) + v0 := b.NewValue0(v.Pos, OpAMD64UCOMISD, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLessEqualFloat64x8(v *Value) bool { +func rewriteValueAMD64_OpEq8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (LessEqualFloat64x8 x y) - // result: (VPMOVMToVec64x8 (VCMPPD512 [2] x y)) + // match: (Eq8 x y) + // result: (SETEQ (CMPB x y)) for { x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask) - v0.AuxInt = int8ToAuxInt(2) + v.reset(OpAMD64SETEQ) + v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags) v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLessEqualInt16x16(v *Value) bool { +func rewriteValueAMD64_OpEqB(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (LessEqualInt16x16 x y) - // result: (VPMOVMToVec16x16 (VPCMPW256 [2] x y)) + // match: (EqB x y) + // result: (SETEQ (CMPB x y)) for { x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec16x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW256, typ.Mask) - v0.AuxInt = int8ToAuxInt(2) + v.reset(OpAMD64SETEQ) + v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags) v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLessEqualInt16x32(v *Value) bool { +func rewriteValueAMD64_OpEqPtr(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (LessEqualInt16x32 x y) - // result: (VPMOVMToVec16x32 (VPCMPW512 [2] x y)) + // match: (EqPtr x y) + // result: (SETEQ (CMPQ x y)) for { x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec16x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW512, typ.Mask) - v0.AuxInt = int8ToAuxInt(2) + v.reset(OpAMD64SETEQ) + v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags) v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLessEqualInt16x8(v *Value) bool { +func rewriteValueAMD64_OpEqualFloat32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (LessEqualInt16x8 x y) - // result: (VPMOVMToVec16x8 (VPCMPW128 [2] x y)) + // match: (EqualFloat32x16 x y) + // result: (VPMOVMToVec32x16 (VCMPPS512 [0] x y)) for { x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec16x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW128, typ.Mask) - v0.AuxInt = int8ToAuxInt(2) + v.reset(OpAMD64VPMOVMToVec32x16) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask) + v0.AuxInt = int8ToAuxInt(0) v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLessEqualInt32x16(v *Value) bool { +func rewriteValueAMD64_OpEqualFloat32x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LessEqualInt32x16 x y) - // result: (VPMOVMToVec32x16 (VPCMPD512 [2] x y)) + // match: (EqualFloat32x4 x y) + // result: (VCMPPS128 [0] x y) for { x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD512, typ.Mask) - v0.AuxInt = int8ToAuxInt(2) - v0.AddArg2(x, y) - v.AddArg(v0) + v.reset(OpAMD64VCMPPS128) + v.AuxInt = int8ToAuxInt(0) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpLessEqualInt32x4(v *Value) bool { +func rewriteValueAMD64_OpEqualFloat32x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LessEqualInt32x4 x y) - // result: (VPMOVMToVec32x4 (VPCMPD128 [2] x y)) + // match: (EqualFloat32x8 x y) + // result: (VCMPPS256 [0] x y) for { x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec32x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD128, typ.Mask) - v0.AuxInt = int8ToAuxInt(2) - v0.AddArg2(x, y) - v.AddArg(v0) + v.reset(OpAMD64VCMPPS256) + v.AuxInt = int8ToAuxInt(0) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpLessEqualInt32x8(v *Value) bool { +func rewriteValueAMD64_OpEqualFloat64x2(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LessEqualInt32x8 x y) - // result: (VPMOVMToVec32x8 (VPCMPD256 [2] x y)) + // match: (EqualFloat64x2 x y) + // result: (VCMPPD128 [0] x y) for { x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec32x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD256, typ.Mask) - v0.AuxInt = int8ToAuxInt(2) - v0.AddArg2(x, y) - v.AddArg(v0) + v.reset(OpAMD64VCMPPD128) + v.AuxInt = int8ToAuxInt(0) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpLessEqualInt64x2(v *Value) bool { +func rewriteValueAMD64_OpEqualFloat64x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LessEqualInt64x2 x y) - // result: (VPMOVMToVec64x2 (VPCMPQ128 [2] x y)) + // match: (EqualFloat64x4 x y) + // result: (VCMPPD256 [0] x y) for { x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec64x2) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ128, typ.Mask) - v0.AuxInt = int8ToAuxInt(2) - v0.AddArg2(x, y) - v.AddArg(v0) + v.reset(OpAMD64VCMPPD256) + v.AuxInt = int8ToAuxInt(0) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpLessEqualInt64x4(v *Value) bool { +func rewriteValueAMD64_OpEqualFloat64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (LessEqualInt64x4 x y) - // result: (VPMOVMToVec64x4 (VPCMPQ256 [2] x y)) + // match: (EqualFloat64x8 x y) + // result: (VPMOVMToVec64x8 (VCMPPD512 [0] x y)) for { x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec64x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ256, typ.Mask) - v0.AuxInt = int8ToAuxInt(2) + v.reset(OpAMD64VPMOVMToVec64x8) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask) + v0.AuxInt = int8ToAuxInt(0) v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLessEqualInt64x8(v *Value) bool { +func rewriteValueAMD64_OpEqualInt16x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (LessEqualInt64x8 x y) - // result: (VPMOVMToVec64x8 (VPCMPQ512 [2] x y)) + // match: (EqualInt16x32 x y) + // result: (VPMOVMToVec16x32 (VPCMPW512 [0] x y)) for { x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ512, typ.Mask) - v0.AuxInt = int8ToAuxInt(2) + v.reset(OpAMD64VPMOVMToVec16x32) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW512, typ.Mask) + v0.AuxInt = int8ToAuxInt(0) v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLessEqualInt8x16(v *Value) bool { +func rewriteValueAMD64_OpEqualInt32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (LessEqualInt8x16 x y) - // result: (VPMOVMToVec8x16 (VPCMPB128 [2] x y)) + // match: (EqualInt32x16 x y) + // result: (VPMOVMToVec32x16 (VPCMPD512 [0] x y)) for { x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec8x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB128, typ.Mask) - v0.AuxInt = int8ToAuxInt(2) + v.reset(OpAMD64VPMOVMToVec32x16) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD512, typ.Mask) + v0.AuxInt = int8ToAuxInt(0) v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLessEqualInt8x32(v *Value) bool { +func rewriteValueAMD64_OpEqualInt64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (LessEqualInt8x32 x y) - // result: (VPMOVMToVec8x32 (VPCMPB256 [2] x y)) + // match: (EqualInt64x8 x y) + // result: (VPMOVMToVec64x8 (VPCMPQ512 [0] x y)) for { x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec8x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB256, typ.Mask) - v0.AuxInt = int8ToAuxInt(2) + v.reset(OpAMD64VPMOVMToVec64x8) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ512, typ.Mask) + v0.AuxInt = int8ToAuxInt(0) v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLessEqualInt8x64(v *Value) bool { +func rewriteValueAMD64_OpEqualInt8x64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (LessEqualInt8x64 x y) - // result: (VPMOVMToVec8x64 (VPCMPB512 [2] x y)) + // match: (EqualInt8x64 x y) + // result: (VPMOVMToVec8x64 (VPCMPB512 [0] x y)) for { x := v_0 y := v_1 v.reset(OpAMD64VPMOVMToVec8x64) v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB512, typ.Mask) - v0.AuxInt = int8ToAuxInt(2) + v0.AuxInt = int8ToAuxInt(0) v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLessEqualUint16x16(v *Value) bool { +func rewriteValueAMD64_OpEqualMaskedFloat32x16(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (LessEqualUint16x16 x y) - // result: (VPMOVMToVec16x16 (VPCMPUW256 [2] x y)) + // match: (EqualMaskedFloat32x16 x y mask) + // result: (VPMOVMToVec32x16 (VCMPPSMasked512 [0] x y (VPMOVVec32x16ToM mask))) for { x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec16x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW256, typ.Mask) - v0.AuxInt = int8ToAuxInt(2) - v0.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPMOVMToVec32x16) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(0) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLessEqualUint16x32(v *Value) bool { +func rewriteValueAMD64_OpEqualMaskedFloat32x4(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (LessEqualUint16x32 x y) - // result: (VPMOVMToVec16x32 (VPCMPUW512 [2] x y)) + // match: (EqualMaskedFloat32x4 x y mask) + // result: (VPMOVMToVec32x4 (VCMPPSMasked128 [0] x y (VPMOVVec32x4ToM mask))) for { x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec16x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW512, typ.Mask) - v0.AuxInt = int8ToAuxInt(2) - v0.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPMOVMToVec32x4) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(0) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLessEqualUint16x8(v *Value) bool { +func rewriteValueAMD64_OpEqualMaskedFloat32x8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (LessEqualUint16x8 x y) - // result: (VPMOVMToVec16x8 (VPCMPUW128 [2] x y)) + // match: (EqualMaskedFloat32x8 x y mask) + // result: (VPMOVMToVec32x8 (VCMPPSMasked256 [0] x y (VPMOVVec32x8ToM mask))) for { x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec16x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW128, typ.Mask) - v0.AuxInt = int8ToAuxInt(2) - v0.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPMOVMToVec32x8) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(0) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLessEqualUint32x16(v *Value) bool { +func rewriteValueAMD64_OpEqualMaskedFloat64x2(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (LessEqualUint32x16 x y) - // result: (VPMOVMToVec32x16 (VPCMPUD512 [2] x y)) + // match: (EqualMaskedFloat64x2 x y mask) + // result: (VPMOVMToVec64x2 (VCMPPDMasked128 [0] x y (VPMOVVec64x2ToM mask))) for { x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD512, typ.Mask) - v0.AuxInt = int8ToAuxInt(2) - v0.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPMOVMToVec64x2) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(0) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLessEqualUint32x4(v *Value) bool { +func rewriteValueAMD64_OpEqualMaskedFloat64x4(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (LessEqualUint32x4 x y) - // result: (VPMOVMToVec32x4 (VPCMPUD128 [2] x y)) + // match: (EqualMaskedFloat64x4 x y mask) + // result: (VPMOVMToVec64x4 (VCMPPDMasked256 [0] x y (VPMOVVec64x4ToM mask))) for { x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec32x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD128, typ.Mask) - v0.AuxInt = int8ToAuxInt(2) - v0.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPMOVMToVec64x4) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(0) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLessEqualUint32x8(v *Value) bool { +func rewriteValueAMD64_OpEqualMaskedFloat64x8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (LessEqualUint32x8 x y) - // result: (VPMOVMToVec32x8 (VPCMPUD256 [2] x y)) + // match: (EqualMaskedFloat64x8 x y mask) + // result: (VPMOVMToVec64x8 (VCMPPDMasked512 [0] x y (VPMOVVec64x8ToM mask))) for { x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec32x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD256, typ.Mask) - v0.AuxInt = int8ToAuxInt(2) - v0.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPMOVMToVec64x8) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(0) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLessEqualUint64x2(v *Value) bool { +func rewriteValueAMD64_OpEqualMaskedInt16x16(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (LessEqualUint64x2 x y) - // result: (VPMOVMToVec64x2 (VPCMPUQ128 [2] x y)) + // match: (EqualMaskedInt16x16 x y mask) + // result: (VPMOVMToVec16x16 (VPCMPWMasked256 [0] x y (VPMOVVec16x16ToM mask))) for { x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec64x2) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ128, typ.Mask) - v0.AuxInt = int8ToAuxInt(2) - v0.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPMOVMToVec16x16) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(0) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLessEqualUint64x4(v *Value) bool { +func rewriteValueAMD64_OpEqualMaskedInt16x32(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (LessEqualUint64x4 x y) - // result: (VPMOVMToVec64x4 (VPCMPUQ256 [2] x y)) + // match: (EqualMaskedInt16x32 x y mask) + // result: (VPMOVMToVec16x32 (VPCMPWMasked512 [0] x y (VPMOVVec16x32ToM mask))) for { x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec64x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ256, typ.Mask) - v0.AuxInt = int8ToAuxInt(2) - v0.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPMOVMToVec16x32) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(0) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLessEqualUint64x8(v *Value) bool { +func rewriteValueAMD64_OpEqualMaskedInt16x8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (LessEqualUint64x8 x y) - // result: (VPMOVMToVec64x8 (VPCMPUQ512 [2] x y)) + // match: (EqualMaskedInt16x8 x y mask) + // result: (VPMOVMToVec16x8 (VPCMPWMasked128 [0] x y (VPMOVVec16x8ToM mask))) for { x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ512, typ.Mask) - v0.AuxInt = int8ToAuxInt(2) - v0.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPMOVMToVec16x8) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(0) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLessEqualUint8x16(v *Value) bool { +func rewriteValueAMD64_OpEqualMaskedInt32x16(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (LessEqualUint8x16 x y) - // result: (VPMOVMToVec8x16 (VPCMPUB128 [2] x y)) + // match: (EqualMaskedInt32x16 x y mask) + // result: (VPMOVMToVec32x16 (VPCMPDMasked512 [0] x y (VPMOVVec32x16ToM mask))) for { x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec8x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB128, typ.Mask) - v0.AuxInt = int8ToAuxInt(2) - v0.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPMOVMToVec32x16) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(0) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLessEqualUint8x32(v *Value) bool { +func rewriteValueAMD64_OpEqualMaskedInt32x4(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (LessEqualUint8x32 x y) - // result: (VPMOVMToVec8x32 (VPCMPUB256 [2] x y)) + // match: (EqualMaskedInt32x4 x y mask) + // result: (VPMOVMToVec32x4 (VPCMPDMasked128 [0] x y (VPMOVVec32x4ToM mask))) for { x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec8x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB256, typ.Mask) - v0.AuxInt = int8ToAuxInt(2) - v0.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPMOVMToVec32x4) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(0) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLessEqualUint8x64(v *Value) bool { +func rewriteValueAMD64_OpEqualMaskedInt32x8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (LessEqualUint8x64 x y) - // result: (VPMOVMToVec8x64 (VPCMPUB512 [2] x y)) + // match: (EqualMaskedInt32x8 x y mask) + // result: (VPMOVMToVec32x8 (VPCMPDMasked256 [0] x y (VPMOVVec32x8ToM mask))) for { x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec8x64) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB512, typ.Mask) - v0.AuxInt = int8ToAuxInt(2) - v0.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPMOVMToVec32x8) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(0) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLessFloat32x16(v *Value) bool { +func rewriteValueAMD64_OpEqualMaskedInt64x2(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (LessFloat32x16 x y) - // result: (VPMOVMToVec32x16 (VCMPPS512 [1] x y)) + // match: (EqualMaskedInt64x2 x y mask) + // result: (VPMOVMToVec64x2 (VPCMPQMasked128 [0] x y (VPMOVVec64x2ToM mask))) for { x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask) - v0.AuxInt = int8ToAuxInt(1) - v0.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPMOVMToVec64x2) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(0) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLessFloat32x4(v *Value) bool { +func rewriteValueAMD64_OpEqualMaskedInt64x4(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (LessFloat32x4 x y) - // result: (VCMPPS128 [1] x y) + b := v.Block + typ := &b.Func.Config.Types + // match: (EqualMaskedInt64x4 x y mask) + // result: (VPMOVMToVec64x4 (VPCMPQMasked256 [0] x y (VPMOVVec64x4ToM mask))) for { x := v_0 y := v_1 - v.reset(OpAMD64VCMPPS128) - v.AuxInt = int8ToAuxInt(1) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPMOVMToVec64x4) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(0) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLessFloat32x8(v *Value) bool { +func rewriteValueAMD64_OpEqualMaskedInt64x8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (LessFloat32x8 x y) - // result: (VCMPPS256 [1] x y) + b := v.Block + typ := &b.Func.Config.Types + // match: (EqualMaskedInt64x8 x y mask) + // result: (VPMOVMToVec64x8 (VPCMPQMasked512 [0] x y (VPMOVVec64x8ToM mask))) for { x := v_0 y := v_1 - v.reset(OpAMD64VCMPPS256) - v.AuxInt = int8ToAuxInt(1) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPMOVMToVec64x8) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(0) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLessFloat64x2(v *Value) bool { +func rewriteValueAMD64_OpEqualMaskedInt8x16(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (LessFloat64x2 x y) - // result: (VCMPPD128 [1] x y) + b := v.Block + typ := &b.Func.Config.Types + // match: (EqualMaskedInt8x16 x y mask) + // result: (VPMOVMToVec8x16 (VPCMPBMasked128 [0] x y (VPMOVVec8x16ToM mask))) for { x := v_0 y := v_1 - v.reset(OpAMD64VCMPPD128) - v.AuxInt = int8ToAuxInt(1) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPMOVMToVec8x16) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(0) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLessFloat64x4(v *Value) bool { +func rewriteValueAMD64_OpEqualMaskedInt8x32(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (LessFloat64x4 x y) - // result: (VCMPPD256 [1] x y) + b := v.Block + typ := &b.Func.Config.Types + // match: (EqualMaskedInt8x32 x y mask) + // result: (VPMOVMToVec8x32 (VPCMPBMasked256 [0] x y (VPMOVVec8x32ToM mask))) for { x := v_0 y := v_1 - v.reset(OpAMD64VCMPPD256) - v.AuxInt = int8ToAuxInt(1) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPMOVMToVec8x32) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(0) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLessFloat64x8(v *Value) bool { +func rewriteValueAMD64_OpEqualMaskedInt8x64(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (LessFloat64x8 x y) - // result: (VPMOVMToVec64x8 (VCMPPD512 [1] x y)) + // match: (EqualMaskedInt8x64 x y mask) + // result: (VPMOVMToVec8x64 (VPCMPBMasked512 [0] x y (VPMOVVec8x64ToM mask))) for { x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask) - v0.AuxInt = int8ToAuxInt(1) - v0.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPMOVMToVec8x64) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(0) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLessInt16x16(v *Value) bool { +func rewriteValueAMD64_OpEqualMaskedUint16x16(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (LessInt16x16 x y) - // result: (VPMOVMToVec16x16 (VPCMPW256 [1] x y)) + // match: (EqualMaskedUint16x16 x y mask) + // result: (VPMOVMToVec16x16 (VPCMPUWMasked256 [0] x y (VPMOVVec16x16ToM mask))) for { x := v_0 y := v_1 + mask := v_2 v.reset(OpAMD64VPMOVMToVec16x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW256, typ.Mask) - v0.AuxInt = int8ToAuxInt(1) - v0.AddArg2(x, y) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(0) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLessInt16x32(v *Value) bool { +func rewriteValueAMD64_OpEqualMaskedUint16x32(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (LessInt16x32 x y) - // result: (VPMOVMToVec16x32 (VPCMPW512 [1] x y)) + // match: (EqualMaskedUint16x32 x y mask) + // result: (VPMOVMToVec16x32 (VPCMPUWMasked512 [0] x y (VPMOVVec16x32ToM mask))) for { x := v_0 y := v_1 + mask := v_2 v.reset(OpAMD64VPMOVMToVec16x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW512, typ.Mask) - v0.AuxInt = int8ToAuxInt(1) - v0.AddArg2(x, y) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(0) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLessInt16x8(v *Value) bool { +func rewriteValueAMD64_OpEqualMaskedUint16x8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (LessInt16x8 x y) - // result: (VPMOVMToVec16x8 (VPCMPW128 [1] x y)) + // match: (EqualMaskedUint16x8 x y mask) + // result: (VPMOVMToVec16x8 (VPCMPUWMasked128 [0] x y (VPMOVVec16x8ToM mask))) for { x := v_0 y := v_1 + mask := v_2 v.reset(OpAMD64VPMOVMToVec16x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW128, typ.Mask) - v0.AuxInt = int8ToAuxInt(1) - v0.AddArg2(x, y) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(0) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLessInt32x16(v *Value) bool { +func rewriteValueAMD64_OpEqualMaskedUint32x16(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (LessInt32x16 x y) - // result: (VPMOVMToVec32x16 (VPCMPD512 [1] x y)) + // match: (EqualMaskedUint32x16 x y mask) + // result: (VPMOVMToVec32x16 (VPCMPUDMasked512 [0] x y (VPMOVVec32x16ToM mask))) for { x := v_0 y := v_1 + mask := v_2 v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD512, typ.Mask) - v0.AuxInt = int8ToAuxInt(1) - v0.AddArg2(x, y) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(0) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLessInt32x4(v *Value) bool { +func rewriteValueAMD64_OpEqualMaskedUint32x4(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (LessInt32x4 x y) - // result: (VPMOVMToVec32x4 (VPCMPD128 [1] x y)) + // match: (EqualMaskedUint32x4 x y mask) + // result: (VPMOVMToVec32x4 (VPCMPUDMasked128 [0] x y (VPMOVVec32x4ToM mask))) for { x := v_0 y := v_1 + mask := v_2 v.reset(OpAMD64VPMOVMToVec32x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD128, typ.Mask) - v0.AuxInt = int8ToAuxInt(1) - v0.AddArg2(x, y) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(0) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLessInt32x8(v *Value) bool { +func rewriteValueAMD64_OpEqualMaskedUint32x8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (LessInt32x8 x y) - // result: (VPMOVMToVec32x8 (VPCMPD256 [1] x y)) + // match: (EqualMaskedUint32x8 x y mask) + // result: (VPMOVMToVec32x8 (VPCMPUDMasked256 [0] x y (VPMOVVec32x8ToM mask))) for { x := v_0 y := v_1 + mask := v_2 v.reset(OpAMD64VPMOVMToVec32x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD256, typ.Mask) - v0.AuxInt = int8ToAuxInt(1) - v0.AddArg2(x, y) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(0) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLessInt64x2(v *Value) bool { +func rewriteValueAMD64_OpEqualMaskedUint64x2(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (LessInt64x2 x y) - // result: (VPMOVMToVec64x2 (VPCMPQ128 [1] x y)) + // match: (EqualMaskedUint64x2 x y mask) + // result: (VPMOVMToVec64x2 (VPCMPUQMasked128 [0] x y (VPMOVVec64x2ToM mask))) for { x := v_0 y := v_1 + mask := v_2 v.reset(OpAMD64VPMOVMToVec64x2) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ128, typ.Mask) - v0.AuxInt = int8ToAuxInt(1) - v0.AddArg2(x, y) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(0) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLessInt64x4(v *Value) bool { +func rewriteValueAMD64_OpEqualMaskedUint64x4(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (LessInt64x4 x y) - // result: (VPMOVMToVec64x4 (VPCMPQ256 [1] x y)) + // match: (EqualMaskedUint64x4 x y mask) + // result: (VPMOVMToVec64x4 (VPCMPUQMasked256 [0] x y (VPMOVVec64x4ToM mask))) for { x := v_0 y := v_1 + mask := v_2 v.reset(OpAMD64VPMOVMToVec64x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ256, typ.Mask) - v0.AuxInt = int8ToAuxInt(1) - v0.AddArg2(x, y) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(0) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLessInt64x8(v *Value) bool { +func rewriteValueAMD64_OpEqualMaskedUint64x8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (LessInt64x8 x y) - // result: (VPMOVMToVec64x8 (VPCMPQ512 [1] x y)) + // match: (EqualMaskedUint64x8 x y mask) + // result: (VPMOVMToVec64x8 (VPCMPUQMasked512 [0] x y (VPMOVVec64x8ToM mask))) for { x := v_0 y := v_1 + mask := v_2 v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ512, typ.Mask) - v0.AuxInt = int8ToAuxInt(1) - v0.AddArg2(x, y) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(0) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLessInt8x16(v *Value) bool { +func rewriteValueAMD64_OpEqualMaskedUint8x16(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (LessInt8x16 x y) - // result: (VPMOVMToVec8x16 (VPCMPB128 [1] x y)) + // match: (EqualMaskedUint8x16 x y mask) + // result: (VPMOVMToVec8x16 (VPCMPUBMasked128 [0] x y (VPMOVVec8x16ToM mask))) for { x := v_0 y := v_1 + mask := v_2 v.reset(OpAMD64VPMOVMToVec8x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB128, typ.Mask) - v0.AuxInt = int8ToAuxInt(1) - v0.AddArg2(x, y) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(0) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLessInt8x32(v *Value) bool { +func rewriteValueAMD64_OpEqualMaskedUint8x32(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (LessInt8x32 x y) - // result: (VPMOVMToVec8x32 (VPCMPB256 [1] x y)) + // match: (EqualMaskedUint8x32 x y mask) + // result: (VPMOVMToVec8x32 (VPCMPUBMasked256 [0] x y (VPMOVVec8x32ToM mask))) for { x := v_0 y := v_1 + mask := v_2 v.reset(OpAMD64VPMOVMToVec8x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB256, typ.Mask) - v0.AuxInt = int8ToAuxInt(1) - v0.AddArg2(x, y) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(0) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLessInt8x64(v *Value) bool { +func rewriteValueAMD64_OpEqualMaskedUint8x64(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (LessInt8x64 x y) - // result: (VPMOVMToVec8x64 (VPCMPB512 [1] x y)) + // match: (EqualMaskedUint8x64 x y mask) + // result: (VPMOVMToVec8x64 (VPCMPUBMasked512 [0] x y (VPMOVVec8x64ToM mask))) for { x := v_0 y := v_1 + mask := v_2 v.reset(OpAMD64VPMOVMToVec8x64) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB512, typ.Mask) - v0.AuxInt = int8ToAuxInt(1) - v0.AddArg2(x, y) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(0) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLessUint16x16(v *Value) bool { +func rewriteValueAMD64_OpEqualUint16x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (LessUint16x16 x y) - // result: (VPMOVMToVec16x16 (VPCMPUW256 [1] x y)) + // match: (EqualUint16x16 x y) + // result: (VPMOVMToVec16x16 (VPCMPUW256 [0] x y)) for { x := v_0 y := v_1 v.reset(OpAMD64VPMOVMToVec16x16) v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW256, typ.Mask) - v0.AuxInt = int8ToAuxInt(1) + v0.AuxInt = int8ToAuxInt(0) v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLessUint16x32(v *Value) bool { +func rewriteValueAMD64_OpEqualUint16x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (LessUint16x32 x y) - // result: (VPMOVMToVec16x32 (VPCMPUW512 [1] x y)) + // match: (EqualUint16x32 x y) + // result: (VPMOVMToVec16x32 (VPCMPUW512 [0] x y)) for { x := v_0 y := v_1 v.reset(OpAMD64VPMOVMToVec16x32) v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW512, typ.Mask) - v0.AuxInt = int8ToAuxInt(1) + v0.AuxInt = int8ToAuxInt(0) v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLessUint16x8(v *Value) bool { +func rewriteValueAMD64_OpEqualUint16x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (LessUint16x8 x y) - // result: (VPMOVMToVec16x8 (VPCMPUW128 [1] x y)) + // match: (EqualUint16x8 x y) + // result: (VPMOVMToVec16x8 (VPCMPUW128 [0] x y)) for { x := v_0 y := v_1 v.reset(OpAMD64VPMOVMToVec16x8) v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW128, typ.Mask) - v0.AuxInt = int8ToAuxInt(1) + v0.AuxInt = int8ToAuxInt(0) v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLessUint32x16(v *Value) bool { +func rewriteValueAMD64_OpEqualUint32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (LessUint32x16 x y) - // result: (VPMOVMToVec32x16 (VPCMPUD512 [1] x y)) + // match: (EqualUint32x16 x y) + // result: (VPMOVMToVec32x16 (VPCMPUD512 [0] x y)) for { x := v_0 y := v_1 v.reset(OpAMD64VPMOVMToVec32x16) v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD512, typ.Mask) - v0.AuxInt = int8ToAuxInt(1) + v0.AuxInt = int8ToAuxInt(0) v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLessUint32x4(v *Value) bool { +func rewriteValueAMD64_OpEqualUint32x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (LessUint32x4 x y) - // result: (VPMOVMToVec32x4 (VPCMPUD128 [1] x y)) + // match: (EqualUint32x4 x y) + // result: (VPMOVMToVec32x4 (VPCMPUD128 [0] x y)) for { x := v_0 y := v_1 v.reset(OpAMD64VPMOVMToVec32x4) v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD128, typ.Mask) - v0.AuxInt = int8ToAuxInt(1) + v0.AuxInt = int8ToAuxInt(0) v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLessUint32x8(v *Value) bool { +func rewriteValueAMD64_OpEqualUint32x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (LessUint32x8 x y) - // result: (VPMOVMToVec32x8 (VPCMPUD256 [1] x y)) + // match: (EqualUint32x8 x y) + // result: (VPMOVMToVec32x8 (VPCMPUD256 [0] x y)) for { x := v_0 y := v_1 v.reset(OpAMD64VPMOVMToVec32x8) v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD256, typ.Mask) - v0.AuxInt = int8ToAuxInt(1) + v0.AuxInt = int8ToAuxInt(0) v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLessUint64x2(v *Value) bool { +func rewriteValueAMD64_OpEqualUint64x2(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (LessUint64x2 x y) - // result: (VPMOVMToVec64x2 (VPCMPUQ128 [1] x y)) + // match: (EqualUint64x2 x y) + // result: (VPMOVMToVec64x2 (VPCMPUQ128 [0] x y)) for { x := v_0 y := v_1 v.reset(OpAMD64VPMOVMToVec64x2) v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ128, typ.Mask) - v0.AuxInt = int8ToAuxInt(1) + v0.AuxInt = int8ToAuxInt(0) v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLessUint64x4(v *Value) bool { +func rewriteValueAMD64_OpEqualUint64x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (LessUint64x4 x y) - // result: (VPMOVMToVec64x4 (VPCMPUQ256 [1] x y)) + // match: (EqualUint64x4 x y) + // result: (VPMOVMToVec64x4 (VPCMPUQ256 [0] x y)) for { x := v_0 y := v_1 v.reset(OpAMD64VPMOVMToVec64x4) v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ256, typ.Mask) - v0.AuxInt = int8ToAuxInt(1) + v0.AuxInt = int8ToAuxInt(0) v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLessUint64x8(v *Value) bool { +func rewriteValueAMD64_OpEqualUint64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (LessUint64x8 x y) - // result: (VPMOVMToVec64x8 (VPCMPUQ512 [1] x y)) + // match: (EqualUint64x8 x y) + // result: (VPMOVMToVec64x8 (VPCMPUQ512 [0] x y)) for { x := v_0 y := v_1 v.reset(OpAMD64VPMOVMToVec64x8) v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ512, typ.Mask) - v0.AuxInt = int8ToAuxInt(1) + v0.AuxInt = int8ToAuxInt(0) v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLessUint8x16(v *Value) bool { +func rewriteValueAMD64_OpEqualUint8x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (LessUint8x16 x y) - // result: (VPMOVMToVec8x16 (VPCMPUB128 [1] x y)) + // match: (EqualUint8x16 x y) + // result: (VPMOVMToVec8x16 (VPCMPUB128 [0] x y)) for { x := v_0 y := v_1 v.reset(OpAMD64VPMOVMToVec8x16) v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB128, typ.Mask) - v0.AuxInt = int8ToAuxInt(1) + v0.AuxInt = int8ToAuxInt(0) v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLessUint8x32(v *Value) bool { +func rewriteValueAMD64_OpEqualUint8x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (LessUint8x32 x y) - // result: (VPMOVMToVec8x32 (VPCMPUB256 [1] x y)) + // match: (EqualUint8x32 x y) + // result: (VPMOVMToVec8x32 (VPCMPUB256 [0] x y)) for { x := v_0 y := v_1 v.reset(OpAMD64VPMOVMToVec8x32) v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB256, typ.Mask) - v0.AuxInt = int8ToAuxInt(1) + v0.AuxInt = int8ToAuxInt(0) v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLessUint8x64(v *Value) bool { +func rewriteValueAMD64_OpEqualUint8x64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (LessUint8x64 x y) - // result: (VPMOVMToVec8x64 (VPCMPUB512 [1] x y)) + // match: (EqualUint8x64 x y) + // result: (VPMOVMToVec8x64 (VPCMPUB512 [0] x y)) for { x := v_0 y := v_1 v.reset(OpAMD64VPMOVMToVec8x64) v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB512, typ.Mask) - v0.AuxInt = int8ToAuxInt(1) + v0.AuxInt = int8ToAuxInt(0) v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpLoad(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (Load ptr mem) - // cond: (is64BitInt(t) || isPtr(t)) - // result: (MOVQload ptr mem) - for { - t := v.Type - ptr := v_0 - mem := v_1 - if !(is64BitInt(t) || isPtr(t)) { - break - } - v.reset(OpAMD64MOVQload) - v.AddArg2(ptr, mem) - return true - } - // match: (Load ptr mem) - // cond: is32BitInt(t) - // result: (MOVLload ptr mem) - for { - t := v.Type - ptr := v_0 - mem := v_1 - if !(is32BitInt(t)) { - break - } - v.reset(OpAMD64MOVLload) - v.AddArg2(ptr, mem) - return true - } - // match: (Load ptr mem) - // cond: is16BitInt(t) - // result: (MOVWload ptr mem) - for { - t := v.Type - ptr := v_0 - mem := v_1 - if !(is16BitInt(t)) { - break - } - v.reset(OpAMD64MOVWload) - v.AddArg2(ptr, mem) - return true - } - // match: (Load ptr mem) - // cond: (t.IsBoolean() || is8BitInt(t)) - // result: (MOVBload ptr mem) - for { - t := v.Type - ptr := v_0 - mem := v_1 - if !(t.IsBoolean() || is8BitInt(t)) { - break - } - v.reset(OpAMD64MOVBload) - v.AddArg2(ptr, mem) - return true - } - // match: (Load ptr mem) - // cond: is32BitFloat(t) - // result: (MOVSSload ptr mem) - for { - t := v.Type - ptr := v_0 - mem := v_1 - if !(is32BitFloat(t)) { - break - } - v.reset(OpAMD64MOVSSload) - v.AddArg2(ptr, mem) - return true - } - // match: (Load ptr mem) - // cond: is64BitFloat(t) - // result: (MOVSDload ptr mem) - for { - t := v.Type - ptr := v_0 - mem := v_1 - if !(is64BitFloat(t)) { - break - } - v.reset(OpAMD64MOVSDload) - v.AddArg2(ptr, mem) - return true - } - // match: (Load ptr mem) - // cond: t.Size() == 16 - // result: (VMOVDQUload128 ptr mem) - for { - t := v.Type - ptr := v_0 - mem := v_1 - if !(t.Size() == 16) { - break - } - v.reset(OpAMD64VMOVDQUload128) - v.AddArg2(ptr, mem) - return true - } - // match: (Load ptr mem) - // cond: t.Size() == 32 - // result: (VMOVDQUload256 ptr mem) - for { - t := v.Type - ptr := v_0 - mem := v_1 - if !(t.Size() == 32) { - break - } - v.reset(OpAMD64VMOVDQUload256) - v.AddArg2(ptr, mem) - return true - } - // match: (Load ptr mem) - // cond: t.Size() == 64 - // result: (VMOVDQUload512 ptr mem) - for { - t := v.Type - ptr := v_0 - mem := v_1 - if !(t.Size() == 64) { - break - } - v.reset(OpAMD64VMOVDQUload512) - v.AddArg2(ptr, mem) - return true - } - return false -} -func rewriteValueAMD64_OpLocalAddr(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LocalAddr {sym} base mem) - // cond: t.Elem().HasPointers() - // result: (LEAQ {sym} (SPanchored base mem)) - for { - t := v.Type - sym := auxToSym(v.Aux) - base := v_0 - mem := v_1 - if !(t.Elem().HasPointers()) { - break - } - v.reset(OpAMD64LEAQ) - v.Aux = symToAux(sym) - v0 := b.NewValue0(v.Pos, OpSPanchored, typ.Uintptr) - v0.AddArg2(base, mem) - v.AddArg(v0) - return true - } - // match: (LocalAddr {sym} base _) - // cond: !t.Elem().HasPointers() - // result: (LEAQ {sym} base) - for { - t := v.Type - sym := auxToSym(v.Aux) - base := v_0 - if !(!t.Elem().HasPointers()) { - break - } - v.reset(OpAMD64LEAQ) - v.Aux = symToAux(sym) - v.AddArg(base) - return true - } - return false -} -func rewriteValueAMD64_OpLsh16x16(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (Lsh16x16 x y) - // cond: !shiftIsBounded(v) - // result: (ANDL (SHLL x y) (SBBLcarrymask (CMPWconst y [32]))) - for { - t := v.Type - x := v_0 - y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64ANDL) - v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) - v2.AuxInt = int16ToAuxInt(32) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) - return true - } - // match: (Lsh16x16 x y) - // cond: shiftIsBounded(v) - // result: (SHLL x y) - for { - x := v_0 - y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SHLL) - v.AddArg2(x, y) - return true - } - return false -} -func rewriteValueAMD64_OpLsh16x32(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (Lsh16x32 x y) - // cond: !shiftIsBounded(v) - // result: (ANDL (SHLL x y) (SBBLcarrymask (CMPLconst y [32]))) - for { - t := v.Type - x := v_0 - y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64ANDL) - v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) - v2.AuxInt = int32ToAuxInt(32) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) - return true - } - // match: (Lsh16x32 x y) - // cond: shiftIsBounded(v) - // result: (SHLL x y) - for { - x := v_0 - y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SHLL) - v.AddArg2(x, y) - return true - } - return false -} -func rewriteValueAMD64_OpLsh16x64(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (Lsh16x64 x y) - // cond: !shiftIsBounded(v) - // result: (ANDL (SHLL x y) (SBBLcarrymask (CMPQconst y [32]))) - for { - t := v.Type - x := v_0 - y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64ANDL) - v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) - v2.AuxInt = int32ToAuxInt(32) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) - return true - } - // match: (Lsh16x64 x y) - // cond: shiftIsBounded(v) - // result: (SHLL x y) +func rewriteValueAMD64_OpFMA(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (FMA x y z) + // result: (VFMADD231SD z x y) for { x := v_0 y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SHLL) - v.AddArg2(x, y) + z := v_2 + v.reset(OpAMD64VFMADD231SD) + v.AddArg3(z, x, y) return true } - return false } -func rewriteValueAMD64_OpLsh16x8(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpFloor(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (Lsh16x8 x y) - // cond: !shiftIsBounded(v) - // result: (ANDL (SHLL x y) (SBBLcarrymask (CMPBconst y [32]))) + // match: (Floor x) + // result: (ROUNDSD [1] x) for { - t := v.Type x := v_0 - y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64ANDL) - v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) - v2.AuxInt = int8ToAuxInt(32) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) + v.reset(OpAMD64ROUNDSD) + v.AuxInt = int8ToAuxInt(1) + v.AddArg(x) return true } - // match: (Lsh16x8 x y) - // cond: shiftIsBounded(v) - // result: (SHLL x y) +} +func rewriteValueAMD64_OpFloorFloat32x4(v *Value) bool { + v_0 := v.Args[0] + // match: (FloorFloat32x4 x) + // result: (VROUNDPS128 [1] x) for { x := v_0 - y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SHLL) - v.AddArg2(x, y) + v.reset(OpAMD64VROUNDPS128) + v.AuxInt = int8ToAuxInt(1) + v.AddArg(x) return true } - return false } -func rewriteValueAMD64_OpLsh32x16(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpFloorFloat32x8(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (Lsh32x16 x y) - // cond: !shiftIsBounded(v) - // result: (ANDL (SHLL x y) (SBBLcarrymask (CMPWconst y [32]))) + // match: (FloorFloat32x8 x) + // result: (VROUNDPS256 [1] x) for { - t := v.Type x := v_0 - y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64ANDL) - v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) - v2.AuxInt = int16ToAuxInt(32) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) + v.reset(OpAMD64VROUNDPS256) + v.AuxInt = int8ToAuxInt(1) + v.AddArg(x) return true } - // match: (Lsh32x16 x y) - // cond: shiftIsBounded(v) - // result: (SHLL x y) +} +func rewriteValueAMD64_OpFloorFloat64x2(v *Value) bool { + v_0 := v.Args[0] + // match: (FloorFloat64x2 x) + // result: (VROUNDPD128 [1] x) for { x := v_0 - y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SHLL) - v.AddArg2(x, y) + v.reset(OpAMD64VROUNDPD128) + v.AuxInt = int8ToAuxInt(1) + v.AddArg(x) return true } - return false } -func rewriteValueAMD64_OpLsh32x32(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpFloorFloat64x4(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (Lsh32x32 x y) - // cond: !shiftIsBounded(v) - // result: (ANDL (SHLL x y) (SBBLcarrymask (CMPLconst y [32]))) + // match: (FloorFloat64x4 x) + // result: (VROUNDPD256 [1] x) for { - t := v.Type x := v_0 - y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64ANDL) - v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) - v2.AuxInt = int32ToAuxInt(32) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) + v.reset(OpAMD64VROUNDPD256) + v.AuxInt = int8ToAuxInt(1) + v.AddArg(x) return true } - // match: (Lsh32x32 x y) - // cond: shiftIsBounded(v) - // result: (SHLL x y) +} +func rewriteValueAMD64_OpFloorWithPrecisionFloat32x16(v *Value) bool { + v_0 := v.Args[0] + // match: (FloorWithPrecisionFloat32x16 [a] x) + // result: (VRNDSCALEPS512 [a+1] x) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SHLL) - v.AddArg2(x, y) + v.reset(OpAMD64VRNDSCALEPS512) + v.AuxInt = int8ToAuxInt(a + 1) + v.AddArg(x) return true } - return false } -func rewriteValueAMD64_OpLsh32x64(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpFloorWithPrecisionFloat32x4(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (Lsh32x64 x y) - // cond: !shiftIsBounded(v) - // result: (ANDL (SHLL x y) (SBBLcarrymask (CMPQconst y [32]))) + // match: (FloorWithPrecisionFloat32x4 [a] x) + // result: (VRNDSCALEPS128 [a+1] x) for { - t := v.Type + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64ANDL) - v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) - v2.AuxInt = int32ToAuxInt(32) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) + v.reset(OpAMD64VRNDSCALEPS128) + v.AuxInt = int8ToAuxInt(a + 1) + v.AddArg(x) return true } - // match: (Lsh32x64 x y) - // cond: shiftIsBounded(v) - // result: (SHLL x y) +} +func rewriteValueAMD64_OpFloorWithPrecisionFloat32x8(v *Value) bool { + v_0 := v.Args[0] + // match: (FloorWithPrecisionFloat32x8 [a] x) + // result: (VRNDSCALEPS256 [a+1] x) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SHLL) - v.AddArg2(x, y) + v.reset(OpAMD64VRNDSCALEPS256) + v.AuxInt = int8ToAuxInt(a + 1) + v.AddArg(x) return true } - return false } -func rewriteValueAMD64_OpLsh32x8(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpFloorWithPrecisionFloat64x2(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (Lsh32x8 x y) - // cond: !shiftIsBounded(v) - // result: (ANDL (SHLL x y) (SBBLcarrymask (CMPBconst y [32]))) + // match: (FloorWithPrecisionFloat64x2 [a] x) + // result: (VRNDSCALEPD128 [a+1] x) for { - t := v.Type + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64ANDL) - v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) - v2.AuxInt = int8ToAuxInt(32) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) + v.reset(OpAMD64VRNDSCALEPD128) + v.AuxInt = int8ToAuxInt(a + 1) + v.AddArg(x) return true } - // match: (Lsh32x8 x y) - // cond: shiftIsBounded(v) - // result: (SHLL x y) +} +func rewriteValueAMD64_OpFloorWithPrecisionFloat64x4(v *Value) bool { + v_0 := v.Args[0] + // match: (FloorWithPrecisionFloat64x4 [a] x) + // result: (VRNDSCALEPD256 [a+1] x) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SHLL) - v.AddArg2(x, y) + v.reset(OpAMD64VRNDSCALEPD256) + v.AuxInt = int8ToAuxInt(a + 1) + v.AddArg(x) return true } - return false } -func rewriteValueAMD64_OpLsh64x16(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpFloorWithPrecisionFloat64x8(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (Lsh64x16 x y) - // cond: !shiftIsBounded(v) - // result: (ANDQ (SHLQ x y) (SBBQcarrymask (CMPWconst y [64]))) + // match: (FloorWithPrecisionFloat64x8 [a] x) + // result: (VRNDSCALEPD512 [a+1] x) for { - t := v.Type + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64ANDQ) - v0 := b.NewValue0(v.Pos, OpAMD64SHLQ, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) - v2.AuxInt = int16ToAuxInt(64) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) + v.reset(OpAMD64VRNDSCALEPD512) + v.AuxInt = int8ToAuxInt(a + 1) + v.AddArg(x) return true } - // match: (Lsh64x16 x y) - // cond: shiftIsBounded(v) - // result: (SHLQ x y) +} +func rewriteValueAMD64_OpFloorWithPrecisionMaskedFloat32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (FloorWithPrecisionMaskedFloat32x16 [a] x mask) + // result: (VRNDSCALEPSMasked512 [a+1] x (VPMOVVec32x16ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SHLQ) - v.AddArg2(x, y) + mask := v_1 + v.reset(OpAMD64VRNDSCALEPSMasked512) + v.AuxInt = int8ToAuxInt(a + 1) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } - return false } -func rewriteValueAMD64_OpLsh64x32(v *Value) bool { +func rewriteValueAMD64_OpFloorWithPrecisionMaskedFloat32x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Lsh64x32 x y) - // cond: !shiftIsBounded(v) - // result: (ANDQ (SHLQ x y) (SBBQcarrymask (CMPLconst y [64]))) + // match: (FloorWithPrecisionMaskedFloat32x4 [a] x mask) + // result: (VRNDSCALEPSMasked128 [a+1] x (VPMOVVec32x4ToM mask)) for { - t := v.Type + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64ANDQ) - v0 := b.NewValue0(v.Pos, OpAMD64SHLQ, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) - v2.AuxInt = int32ToAuxInt(64) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) + mask := v_1 + v.reset(OpAMD64VRNDSCALEPSMasked128) + v.AuxInt = int8ToAuxInt(a + 1) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } - // match: (Lsh64x32 x y) - // cond: shiftIsBounded(v) - // result: (SHLQ x y) +} +func rewriteValueAMD64_OpFloorWithPrecisionMaskedFloat32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (FloorWithPrecisionMaskedFloat32x8 [a] x mask) + // result: (VRNDSCALEPSMasked256 [a+1] x (VPMOVVec32x8ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SHLQ) - v.AddArg2(x, y) + mask := v_1 + v.reset(OpAMD64VRNDSCALEPSMasked256) + v.AuxInt = int8ToAuxInt(a + 1) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } - return false } -func rewriteValueAMD64_OpLsh64x64(v *Value) bool { +func rewriteValueAMD64_OpFloorWithPrecisionMaskedFloat64x2(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Lsh64x64 x y) - // cond: !shiftIsBounded(v) - // result: (ANDQ (SHLQ x y) (SBBQcarrymask (CMPQconst y [64]))) + // match: (FloorWithPrecisionMaskedFloat64x2 [a] x mask) + // result: (VRNDSCALEPDMasked128 [a+1] x (VPMOVVec64x2ToM mask)) for { - t := v.Type + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64ANDQ) - v0 := b.NewValue0(v.Pos, OpAMD64SHLQ, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) - v2.AuxInt = int32ToAuxInt(64) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) + mask := v_1 + v.reset(OpAMD64VRNDSCALEPDMasked128) + v.AuxInt = int8ToAuxInt(a + 1) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } - // match: (Lsh64x64 x y) - // cond: shiftIsBounded(v) - // result: (SHLQ x y) +} +func rewriteValueAMD64_OpFloorWithPrecisionMaskedFloat64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (FloorWithPrecisionMaskedFloat64x4 [a] x mask) + // result: (VRNDSCALEPDMasked256 [a+1] x (VPMOVVec64x4ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SHLQ) - v.AddArg2(x, y) + mask := v_1 + v.reset(OpAMD64VRNDSCALEPDMasked256) + v.AuxInt = int8ToAuxInt(a + 1) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } - return false } -func rewriteValueAMD64_OpLsh64x8(v *Value) bool { +func rewriteValueAMD64_OpFloorWithPrecisionMaskedFloat64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Lsh64x8 x y) - // cond: !shiftIsBounded(v) - // result: (ANDQ (SHLQ x y) (SBBQcarrymask (CMPBconst y [64]))) + // match: (FloorWithPrecisionMaskedFloat64x8 [a] x mask) + // result: (VRNDSCALEPDMasked512 [a+1] x (VPMOVVec64x8ToM mask)) for { - t := v.Type + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64ANDQ) - v0 := b.NewValue0(v.Pos, OpAMD64SHLQ, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) - v2.AuxInt = int8ToAuxInt(64) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) + mask := v_1 + v.reset(OpAMD64VRNDSCALEPDMasked512) + v.AuxInt = int8ToAuxInt(a + 1) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } - // match: (Lsh64x8 x y) - // cond: shiftIsBounded(v) - // result: (SHLQ x y) +} +func rewriteValueAMD64_OpFusedMultiplyAddMaskedFloat32x16(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (FusedMultiplyAddMaskedFloat32x16 x y z mask) + // result: (VFMADD213PSMasked512 x y z (VPMOVVec32x16ToM mask)) for { x := v_0 y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SHLQ) - v.AddArg2(x, y) + z := v_2 + mask := v_3 + v.reset(OpAMD64VFMADD213PSMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) return true } - return false } -func rewriteValueAMD64_OpLsh8x16(v *Value) bool { +func rewriteValueAMD64_OpFusedMultiplyAddMaskedFloat32x4(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Lsh8x16 x y) - // cond: !shiftIsBounded(v) - // result: (ANDL (SHLL x y) (SBBLcarrymask (CMPWconst y [32]))) + // match: (FusedMultiplyAddMaskedFloat32x4 x y z mask) + // result: (VFMADD213PSMasked128 x y z (VPMOVVec32x4ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64ANDL) - v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) - v2.AuxInt = int16ToAuxInt(32) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) + z := v_2 + mask := v_3 + v.reset(OpAMD64VFMADD213PSMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) return true } - // match: (Lsh8x16 x y) - // cond: shiftIsBounded(v) - // result: (SHLL x y) +} +func rewriteValueAMD64_OpFusedMultiplyAddMaskedFloat32x8(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (FusedMultiplyAddMaskedFloat32x8 x y z mask) + // result: (VFMADD213PSMasked256 x y z (VPMOVVec32x8ToM mask)) for { x := v_0 y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SHLL) - v.AddArg2(x, y) + z := v_2 + mask := v_3 + v.reset(OpAMD64VFMADD213PSMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) return true } - return false } -func rewriteValueAMD64_OpLsh8x32(v *Value) bool { +func rewriteValueAMD64_OpFusedMultiplyAddMaskedFloat64x2(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Lsh8x32 x y) - // cond: !shiftIsBounded(v) - // result: (ANDL (SHLL x y) (SBBLcarrymask (CMPLconst y [32]))) + // match: (FusedMultiplyAddMaskedFloat64x2 x y z mask) + // result: (VFMADD213PDMasked128 x y z (VPMOVVec64x2ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64ANDL) - v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) - v2.AuxInt = int32ToAuxInt(32) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) + z := v_2 + mask := v_3 + v.reset(OpAMD64VFMADD213PDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) return true } - // match: (Lsh8x32 x y) - // cond: shiftIsBounded(v) - // result: (SHLL x y) +} +func rewriteValueAMD64_OpFusedMultiplyAddMaskedFloat64x4(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (FusedMultiplyAddMaskedFloat64x4 x y z mask) + // result: (VFMADD213PDMasked256 x y z (VPMOVVec64x4ToM mask)) for { x := v_0 y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SHLL) - v.AddArg2(x, y) + z := v_2 + mask := v_3 + v.reset(OpAMD64VFMADD213PDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) return true } - return false } -func rewriteValueAMD64_OpLsh8x64(v *Value) bool { +func rewriteValueAMD64_OpFusedMultiplyAddMaskedFloat64x8(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Lsh8x64 x y) - // cond: !shiftIsBounded(v) - // result: (ANDL (SHLL x y) (SBBLcarrymask (CMPQconst y [32]))) + // match: (FusedMultiplyAddMaskedFloat64x8 x y z mask) + // result: (VFMADD213PDMasked512 x y z (VPMOVVec64x8ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64ANDL) - v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) - v2.AuxInt = int32ToAuxInt(32) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) + z := v_2 + mask := v_3 + v.reset(OpAMD64VFMADD213PDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) return true } - // match: (Lsh8x64 x y) - // cond: shiftIsBounded(v) - // result: (SHLL x y) +} +func rewriteValueAMD64_OpFusedMultiplyAddSubMaskedFloat32x16(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (FusedMultiplyAddSubMaskedFloat32x16 x y z mask) + // result: (VFMADDSUB213PSMasked512 x y z (VPMOVVec32x16ToM mask)) for { x := v_0 y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SHLL) - v.AddArg2(x, y) + z := v_2 + mask := v_3 + v.reset(OpAMD64VFMADDSUB213PSMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) return true } - return false } -func rewriteValueAMD64_OpLsh8x8(v *Value) bool { +func rewriteValueAMD64_OpFusedMultiplyAddSubMaskedFloat32x4(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Lsh8x8 x y) - // cond: !shiftIsBounded(v) - // result: (ANDL (SHLL x y) (SBBLcarrymask (CMPBconst y [32]))) + // match: (FusedMultiplyAddSubMaskedFloat32x4 x y z mask) + // result: (VFMADDSUB213PSMasked128 x y z (VPMOVVec32x4ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64ANDL) - v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) - v2.AuxInt = int8ToAuxInt(32) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) + z := v_2 + mask := v_3 + v.reset(OpAMD64VFMADDSUB213PSMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) return true } - // match: (Lsh8x8 x y) - // cond: shiftIsBounded(v) - // result: (SHLL x y) +} +func rewriteValueAMD64_OpFusedMultiplyAddSubMaskedFloat32x8(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (FusedMultiplyAddSubMaskedFloat32x8 x y z mask) + // result: (VFMADDSUB213PSMasked256 x y z (VPMOVVec32x8ToM mask)) for { x := v_0 y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SHLL) - v.AddArg2(x, y) + z := v_2 + mask := v_3 + v.reset(OpAMD64VFMADDSUB213PSMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) return true } - return false } -func rewriteValueAMD64_OpMaskedAbsoluteInt16x16(v *Value) bool { +func rewriteValueAMD64_OpFusedMultiplyAddSubMaskedFloat64x2(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAbsoluteInt16x16 x mask) - // result: (VPABSWMasked256 x (VPMOVVec16x16ToM mask)) + // match: (FusedMultiplyAddSubMaskedFloat64x2 x y z mask) + // result: (VFMADDSUB213PDMasked128 x y z (VPMOVVec64x2ToM mask)) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VPABSWMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VFMADDSUB213PDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg2(x, v0) + v.AddArg4(x, y, z, v0) return true } } -func rewriteValueAMD64_OpMaskedAbsoluteInt16x32(v *Value) bool { +func rewriteValueAMD64_OpFusedMultiplyAddSubMaskedFloat64x4(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAbsoluteInt16x32 x mask) - // result: (VPABSWMasked512 x (VPMOVVec16x32ToM mask)) + // match: (FusedMultiplyAddSubMaskedFloat64x4 x y z mask) + // result: (VFMADDSUB213PDMasked256 x y z (VPMOVVec64x4ToM mask)) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VPABSWMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VFMADDSUB213PDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg2(x, v0) + v.AddArg4(x, y, z, v0) return true } } -func rewriteValueAMD64_OpMaskedAbsoluteInt16x8(v *Value) bool { +func rewriteValueAMD64_OpFusedMultiplyAddSubMaskedFloat64x8(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAbsoluteInt16x8 x mask) - // result: (VPABSWMasked128 x (VPMOVVec16x8ToM mask)) + // match: (FusedMultiplyAddSubMaskedFloat64x8 x y z mask) + // result: (VFMADDSUB213PDMasked512 x y z (VPMOVVec64x8ToM mask)) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VPABSWMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VFMADDSUB213PDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg2(x, v0) + v.AddArg4(x, y, z, v0) return true } } -func rewriteValueAMD64_OpMaskedAbsoluteInt32x16(v *Value) bool { +func rewriteValueAMD64_OpFusedMultiplySubAddMaskedFloat32x16(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAbsoluteInt32x16 x mask) - // result: (VPABSDMasked512 x (VPMOVVec32x16ToM mask)) + // match: (FusedMultiplySubAddMaskedFloat32x16 x y z mask) + // result: (VFMSUBADD213PSMasked512 x y z (VPMOVVec32x16ToM mask)) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VPABSDMasked512) + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VFMSUBADD213PSMasked512) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg2(x, v0) + v.AddArg4(x, y, z, v0) return true } } -func rewriteValueAMD64_OpMaskedAbsoluteInt32x4(v *Value) bool { +func rewriteValueAMD64_OpFusedMultiplySubAddMaskedFloat32x4(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAbsoluteInt32x4 x mask) - // result: (VPABSDMasked128 x (VPMOVVec32x4ToM mask)) + // match: (FusedMultiplySubAddMaskedFloat32x4 x y z mask) + // result: (VFMSUBADD213PSMasked128 x y z (VPMOVVec32x4ToM mask)) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VPABSDMasked128) + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VFMSUBADD213PSMasked128) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg2(x, v0) + v.AddArg4(x, y, z, v0) return true } } -func rewriteValueAMD64_OpMaskedAbsoluteInt32x8(v *Value) bool { +func rewriteValueAMD64_OpFusedMultiplySubAddMaskedFloat32x8(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAbsoluteInt32x8 x mask) - // result: (VPABSDMasked256 x (VPMOVVec32x8ToM mask)) + // match: (FusedMultiplySubAddMaskedFloat32x8 x y z mask) + // result: (VFMSUBADD213PSMasked256 x y z (VPMOVVec32x8ToM mask)) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VPABSDMasked256) + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VFMSUBADD213PSMasked256) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg2(x, v0) + v.AddArg4(x, y, z, v0) return true } } -func rewriteValueAMD64_OpMaskedAbsoluteInt64x2(v *Value) bool { +func rewriteValueAMD64_OpFusedMultiplySubAddMaskedFloat64x2(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAbsoluteInt64x2 x mask) - // result: (VPABSQMasked128 x (VPMOVVec64x2ToM mask)) + // match: (FusedMultiplySubAddMaskedFloat64x2 x y z mask) + // result: (VFMSUBADD213PDMasked128 x y z (VPMOVVec64x2ToM mask)) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VPABSQMasked128) + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VFMSUBADD213PDMasked128) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg2(x, v0) + v.AddArg4(x, y, z, v0) return true } } -func rewriteValueAMD64_OpMaskedAbsoluteInt64x4(v *Value) bool { +func rewriteValueAMD64_OpFusedMultiplySubAddMaskedFloat64x4(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAbsoluteInt64x4 x mask) - // result: (VPABSQMasked256 x (VPMOVVec64x4ToM mask)) + // match: (FusedMultiplySubAddMaskedFloat64x4 x y z mask) + // result: (VFMSUBADD213PDMasked256 x y z (VPMOVVec64x4ToM mask)) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VPABSQMasked256) + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VFMSUBADD213PDMasked256) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg2(x, v0) + v.AddArg4(x, y, z, v0) return true } } -func rewriteValueAMD64_OpMaskedAbsoluteInt64x8(v *Value) bool { +func rewriteValueAMD64_OpFusedMultiplySubAddMaskedFloat64x8(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAbsoluteInt64x8 x mask) - // result: (VPABSQMasked512 x (VPMOVVec64x8ToM mask)) + // match: (FusedMultiplySubAddMaskedFloat64x8 x y z mask) + // result: (VFMSUBADD213PDMasked512 x y z (VPMOVVec64x8ToM mask)) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VPABSQMasked512) + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VFMSUBADD213PDMasked512) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg2(x, v0) + v.AddArg4(x, y, z, v0) return true } } -func rewriteValueAMD64_OpMaskedAbsoluteInt8x16(v *Value) bool { +func rewriteValueAMD64_OpGaloisFieldAffineTransformInversedMaskedUint8x16(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAbsoluteInt8x16 x mask) - // result: (VPABSBMasked128 x (VPMOVVec8x16ToM mask)) + // match: (GaloisFieldAffineTransformInversedMaskedUint8x16 [a] x y mask) + // result: (VGF2P8AFFINEINVQBMasked128 [a] x y (VPMOVVec8x16ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VPABSBMasked128) + y := v_1 + mask := v_2 + v.reset(OpAMD64VGF2P8AFFINEINVQBMasked128) + v.AuxInt = int8ToAuxInt(a) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg2(x, v0) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedAbsoluteInt8x32(v *Value) bool { +func rewriteValueAMD64_OpGaloisFieldAffineTransformInversedMaskedUint8x32(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAbsoluteInt8x32 x mask) - // result: (VPABSBMasked256 x (VPMOVVec8x32ToM mask)) + // match: (GaloisFieldAffineTransformInversedMaskedUint8x32 [a] x y mask) + // result: (VGF2P8AFFINEINVQBMasked256 [a] x y (VPMOVVec8x32ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VPABSBMasked256) + y := v_1 + mask := v_2 + v.reset(OpAMD64VGF2P8AFFINEINVQBMasked256) + v.AuxInt = int8ToAuxInt(a) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg2(x, v0) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedAbsoluteInt8x64(v *Value) bool { +func rewriteValueAMD64_OpGaloisFieldAffineTransformInversedMaskedUint8x64(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAbsoluteInt8x64 x mask) - // result: (VPABSBMasked512 x (VPMOVVec8x64ToM mask)) + // match: (GaloisFieldAffineTransformInversedMaskedUint8x64 [a] x y mask) + // result: (VGF2P8AFFINEINVQBMasked512 [a] x y (VPMOVVec8x64ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VPABSBMasked512) + y := v_1 + mask := v_2 + v.reset(OpAMD64VGF2P8AFFINEINVQBMasked512) + v.AuxInt = int8ToAuxInt(a) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg2(x, v0) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpGaloisFieldAffineTransformInversedUint8x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (GaloisFieldAffineTransformInversedUint8x16 [a] x y) + // result: (VGF2P8AFFINEINVQB128 [a] x y) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + v.reset(OpAMD64VGF2P8AFFINEINVQB128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpGaloisFieldAffineTransformInversedUint8x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (GaloisFieldAffineTransformInversedUint8x32 [a] x y) + // result: (VGF2P8AFFINEINVQB256 [a] x y) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + v.reset(OpAMD64VGF2P8AFFINEINVQB256) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpGaloisFieldAffineTransformInversedUint8x64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (GaloisFieldAffineTransformInversedUint8x64 [a] x y) + // result: (VGF2P8AFFINEINVQB512 [a] x y) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + v.reset(OpAMD64VGF2P8AFFINEINVQB512) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpMaskedAddFloat32x16(v *Value) bool { +func rewriteValueAMD64_OpGaloisFieldAffineTransformMaskedUint8x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAddFloat32x16 x y mask) - // result: (VADDPSMasked512 x y (VPMOVVec32x16ToM mask)) + // match: (GaloisFieldAffineTransformMaskedUint8x16 [a] x y mask) + // result: (VGF2P8AFFINEQBMasked128 [a] x y (VPMOVVec8x16ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VADDPSMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v.reset(OpAMD64VGF2P8AFFINEQBMasked128) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedAddFloat32x4(v *Value) bool { +func rewriteValueAMD64_OpGaloisFieldAffineTransformMaskedUint8x32(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAddFloat32x4 x y mask) - // result: (VADDPSMasked128 x y (VPMOVVec32x4ToM mask)) + // match: (GaloisFieldAffineTransformMaskedUint8x32 [a] x y mask) + // result: (VGF2P8AFFINEQBMasked256 [a] x y (VPMOVVec8x32ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VADDPSMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VGF2P8AFFINEQBMasked256) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedAddFloat32x8(v *Value) bool { +func rewriteValueAMD64_OpGaloisFieldAffineTransformMaskedUint8x64(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAddFloat32x8 x y mask) - // result: (VADDPSMasked256 x y (VPMOVVec32x8ToM mask)) + // match: (GaloisFieldAffineTransformMaskedUint8x64 [a] x y mask) + // result: (VGF2P8AFFINEQBMasked512 [a] x y (VPMOVVec8x64ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VADDPSMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v.reset(OpAMD64VGF2P8AFFINEQBMasked512) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedAddFloat64x2(v *Value) bool { +func rewriteValueAMD64_OpGaloisFieldAffineTransformUint8x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (GaloisFieldAffineTransformUint8x16 [a] x y) + // result: (VGF2P8AFFINEQB128 [a] x y) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + v.reset(OpAMD64VGF2P8AFFINEQB128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpGaloisFieldAffineTransformUint8x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (GaloisFieldAffineTransformUint8x32 [a] x y) + // result: (VGF2P8AFFINEQB256 [a] x y) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + v.reset(OpAMD64VGF2P8AFFINEQB256) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpGaloisFieldAffineTransformUint8x64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (GaloisFieldAffineTransformUint8x64 [a] x y) + // result: (VGF2P8AFFINEQB512 [a] x y) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + v.reset(OpAMD64VGF2P8AFFINEQB512) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpGaloisFieldMulMaskedUint8x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAddFloat64x2 x y mask) - // result: (VADDPDMasked128 x y (VPMOVVec64x2ToM mask)) + // match: (GaloisFieldMulMaskedUint8x16 x y mask) + // result: (VGF2P8MULBMasked128 x y (VPMOVVec8x16ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VADDPDMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v.reset(OpAMD64VGF2P8MULBMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedAddFloat64x4(v *Value) bool { +func rewriteValueAMD64_OpGaloisFieldMulMaskedUint8x32(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAddFloat64x4 x y mask) - // result: (VADDPDMasked256 x y (VPMOVVec64x4ToM mask)) + // match: (GaloisFieldMulMaskedUint8x32 x y mask) + // result: (VGF2P8MULBMasked256 x y (VPMOVVec8x32ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VADDPDMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VGF2P8MULBMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedAddFloat64x8(v *Value) bool { +func rewriteValueAMD64_OpGaloisFieldMulMaskedUint8x64(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAddFloat64x8 x y mask) - // result: (VADDPDMasked512 x y (VPMOVVec64x8ToM mask)) + // match: (GaloisFieldMulMaskedUint8x64 x y mask) + // result: (VGF2P8MULBMasked512 x y (VPMOVVec8x64ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VADDPDMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v.reset(OpAMD64VGF2P8MULBMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedAddInt16x16(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpGet128Float32x8(v *Value) bool { + v_0 := v.Args[0] + // match: (Get128Float32x8 [a] x) + // result: (VEXTRACTF128128 [a] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VEXTRACTF128128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGet128Float64x4(v *Value) bool { + v_0 := v.Args[0] + // match: (Get128Float64x4 [a] x) + // result: (VEXTRACTF128128 [a] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VEXTRACTF128128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGet128Int16x16(v *Value) bool { + v_0 := v.Args[0] + // match: (Get128Int16x16 [a] x) + // result: (VEXTRACTI128128 [a] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VEXTRACTI128128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGet128Int32x8(v *Value) bool { + v_0 := v.Args[0] + // match: (Get128Int32x8 [a] x) + // result: (VEXTRACTI128128 [a] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VEXTRACTI128128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGet128Int64x4(v *Value) bool { + v_0 := v.Args[0] + // match: (Get128Int64x4 [a] x) + // result: (VEXTRACTI128128 [a] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VEXTRACTI128128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGet128Int8x32(v *Value) bool { + v_0 := v.Args[0] + // match: (Get128Int8x32 [a] x) + // result: (VEXTRACTI128128 [a] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VEXTRACTI128128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGet128Uint16x16(v *Value) bool { + v_0 := v.Args[0] + // match: (Get128Uint16x16 [a] x) + // result: (VEXTRACTI128128 [a] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VEXTRACTI128128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGet128Uint32x8(v *Value) bool { + v_0 := v.Args[0] + // match: (Get128Uint32x8 [a] x) + // result: (VEXTRACTI128128 [a] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VEXTRACTI128128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGet128Uint64x4(v *Value) bool { + v_0 := v.Args[0] + // match: (Get128Uint64x4 [a] x) + // result: (VEXTRACTI128128 [a] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VEXTRACTI128128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGet128Uint8x32(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (MaskedAddInt16x16 x y mask) - // result: (VPADDWMasked256 x y (VPMOVVec16x16ToM mask)) + // match: (Get128Uint8x32 [a] x) + // result: (VEXTRACTI128128 [a] x) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPADDWMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VEXTRACTI128128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpMaskedAddInt16x32(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpGetElemInt16x8(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (MaskedAddInt16x32 x y mask) - // result: (VPADDWMasked512 x y (VPMOVVec16x32ToM mask)) + // match: (GetElemInt16x8 [a] x) + // result: (VPEXTRW128 [a] x) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPADDWMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPEXTRW128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpMaskedAddInt16x8(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpGetElemInt32x4(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (MaskedAddInt16x8 x y mask) - // result: (VPADDWMasked128 x y (VPMOVVec16x8ToM mask)) + // match: (GetElemInt32x4 [a] x) + // result: (VPEXTRD128 [a] x) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPADDWMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPEXTRD128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpMaskedAddInt32x16(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpGetElemInt64x2(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (MaskedAddInt32x16 x y mask) - // result: (VPADDDMasked512 x y (VPMOVVec32x16ToM mask)) + // match: (GetElemInt64x2 [a] x) + // result: (VPEXTRQ128 [a] x) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPADDDMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPEXTRQ128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpMaskedAddInt32x4(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpGetElemInt8x16(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (MaskedAddInt32x4 x y mask) - // result: (VPADDDMasked128 x y (VPMOVVec32x4ToM mask)) + // match: (GetElemInt8x16 [a] x) + // result: (VPEXTRB128 [a] x) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPADDDMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPEXTRB128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpMaskedAddInt32x8(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpGetElemUint16x8(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (MaskedAddInt32x8 x y mask) - // result: (VPADDDMasked256 x y (VPMOVVec32x8ToM mask)) + // match: (GetElemUint16x8 [a] x) + // result: (VPEXTRW128 [a] x) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPADDDMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPEXTRW128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpMaskedAddInt64x2(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpGetElemUint32x4(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (MaskedAddInt64x2 x y mask) - // result: (VPADDQMasked128 x y (VPMOVVec64x2ToM mask)) + // match: (GetElemUint32x4 [a] x) + // result: (VPEXTRD128 [a] x) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPADDQMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPEXTRD128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpMaskedAddInt64x4(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpGetElemUint64x2(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (MaskedAddInt64x4 x y mask) - // result: (VPADDQMasked256 x y (VPMOVVec64x4ToM mask)) + // match: (GetElemUint64x2 [a] x) + // result: (VPEXTRQ128 [a] x) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPADDQMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPEXTRQ128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpMaskedAddInt64x8(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpGetElemUint8x16(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (MaskedAddInt64x8 x y mask) - // result: (VPADDQMasked512 x y (VPMOVVec64x8ToM mask)) + // match: (GetElemUint8x16 [a] x) + // result: (VPEXTRB128 [a] x) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPADDQMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPEXTRB128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpMaskedAddInt8x16(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpGetG(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (MaskedAddInt8x16 x y mask) - // result: (VPADDBMasked128 x y (VPMOVVec8x16ToM mask)) + // match: (GetG mem) + // cond: v.Block.Func.OwnAux.Fn.ABI() != obj.ABIInternal + // result: (LoweredGetG mem) for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPADDBMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + mem := v_0 + if !(v.Block.Func.OwnAux.Fn.ABI() != obj.ABIInternal) { + break + } + v.reset(OpAMD64LoweredGetG) + v.AddArg(mem) return true } + return false } -func rewriteValueAMD64_OpMaskedAddInt8x32(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpGreaterEqualFloat32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAddInt8x32 x y mask) - // result: (VPADDBMasked256 x y (VPMOVVec8x32ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterEqualFloat32x16 x y) + // result: (VPMOVMToVec32x16 (VCMPPS512 [13] x y)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPADDBMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec32x16) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask) + v0.AuxInt = int8ToAuxInt(13) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedAddInt8x64(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpGreaterEqualFloat32x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (MaskedAddInt8x64 x y mask) - // result: (VPADDBMasked512 x y (VPMOVVec8x64ToM mask)) + // match: (GreaterEqualFloat32x4 x y) + // result: (VCMPPS128 [13] x y) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPADDBMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VCMPPS128) + v.AuxInt = int8ToAuxInt(13) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpMaskedAddUint16x16(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpGreaterEqualFloat32x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (MaskedAddUint16x16 x y mask) - // result: (VPADDWMasked256 x y (VPMOVVec16x16ToM mask)) + // match: (GreaterEqualFloat32x8 x y) + // result: (VCMPPS256 [13] x y) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPADDWMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VCMPPS256) + v.AuxInt = int8ToAuxInt(13) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpMaskedAddUint16x32(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpGreaterEqualFloat64x2(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (MaskedAddUint16x32 x y mask) - // result: (VPADDWMasked512 x y (VPMOVVec16x32ToM mask)) + // match: (GreaterEqualFloat64x2 x y) + // result: (VCMPPD128 [13] x y) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPADDWMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VCMPPD128) + v.AuxInt = int8ToAuxInt(13) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpMaskedAddUint16x8(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpGreaterEqualFloat64x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (MaskedAddUint16x8 x y mask) - // result: (VPADDWMasked128 x y (VPMOVVec16x8ToM mask)) + // match: (GreaterEqualFloat64x4 x y) + // result: (VCMPPD256 [13] x y) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPADDWMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VCMPPD256) + v.AuxInt = int8ToAuxInt(13) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpMaskedAddUint32x16(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpGreaterEqualFloat64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAddUint32x16 x y mask) - // result: (VPADDDMasked512 x y (VPMOVVec32x16ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterEqualFloat64x8 x y) + // result: (VPMOVMToVec64x8 (VCMPPD512 [13] x y)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPADDDMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec64x8) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask) + v0.AuxInt = int8ToAuxInt(13) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedAddUint32x4(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpGreaterEqualInt16x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAddUint32x4 x y mask) - // result: (VPADDDMasked128 x y (VPMOVVec32x4ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterEqualInt16x16 x y) + // result: (VPMOVMToVec16x16 (VPCMPW256 [13] x y)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPADDDMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec16x16) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW256, typ.Mask) + v0.AuxInt = int8ToAuxInt(13) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedAddUint32x8(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpGreaterEqualInt16x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAddUint32x8 x y mask) - // result: (VPADDDMasked256 x y (VPMOVVec32x8ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterEqualInt16x32 x y) + // result: (VPMOVMToVec16x32 (VPCMPW512 [13] x y)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPADDDMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec16x32) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW512, typ.Mask) + v0.AuxInt = int8ToAuxInt(13) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedAddUint64x2(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpGreaterEqualInt16x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAddUint64x2 x y mask) - // result: (VPADDQMasked128 x y (VPMOVVec64x2ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterEqualInt16x8 x y) + // result: (VPMOVMToVec16x8 (VPCMPW128 [13] x y)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPADDQMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec16x8) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW128, typ.Mask) + v0.AuxInt = int8ToAuxInt(13) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedAddUint64x4(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpGreaterEqualInt32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAddUint64x4 x y mask) - // result: (VPADDQMasked256 x y (VPMOVVec64x4ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterEqualInt32x16 x y) + // result: (VPMOVMToVec32x16 (VPCMPD512 [13] x y)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPADDQMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec32x16) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD512, typ.Mask) + v0.AuxInt = int8ToAuxInt(13) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedAddUint64x8(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpGreaterEqualInt32x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAddUint64x8 x y mask) - // result: (VPADDQMasked512 x y (VPMOVVec64x8ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterEqualInt32x4 x y) + // result: (VPMOVMToVec32x4 (VPCMPD128 [13] x y)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPADDQMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec32x4) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD128, typ.Mask) + v0.AuxInt = int8ToAuxInt(13) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedAddUint8x16(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpGreaterEqualInt32x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAddUint8x16 x y mask) - // result: (VPADDBMasked128 x y (VPMOVVec8x16ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterEqualInt32x8 x y) + // result: (VPMOVMToVec32x8 (VPCMPD256 [13] x y)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPADDBMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec32x8) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD256, typ.Mask) + v0.AuxInt = int8ToAuxInt(13) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedAddUint8x32(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpGreaterEqualInt64x2(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAddUint8x32 x y mask) - // result: (VPADDBMasked256 x y (VPMOVVec8x32ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterEqualInt64x2 x y) + // result: (VPMOVMToVec64x2 (VPCMPQ128 [13] x y)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPADDBMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec64x2) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ128, typ.Mask) + v0.AuxInt = int8ToAuxInt(13) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedAddUint8x64(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpGreaterEqualInt64x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAddUint8x64 x y mask) - // result: (VPADDBMasked512 x y (VPMOVVec8x64ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterEqualInt64x4 x y) + // result: (VPMOVMToVec64x4 (VPCMPQ256 [13] x y)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPADDBMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec64x4) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ256, typ.Mask) + v0.AuxInt = int8ToAuxInt(13) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedAndInt32x16(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpGreaterEqualInt64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAndInt32x16 x y mask) - // result: (VPANDDMasked512 x y (VPMOVVec32x16ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterEqualInt64x8 x y) + // result: (VPMOVMToVec64x8 (VPCMPQ512 [13] x y)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPANDDMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec64x8) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ512, typ.Mask) + v0.AuxInt = int8ToAuxInt(13) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedAndInt32x4(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpGreaterEqualInt8x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAndInt32x4 x y mask) - // result: (VPANDDMasked128 x y (VPMOVVec32x4ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterEqualInt8x16 x y) + // result: (VPMOVMToVec8x16 (VPCMPB128 [13] x y)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPANDDMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec8x16) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB128, typ.Mask) + v0.AuxInt = int8ToAuxInt(13) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedAndInt32x8(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpGreaterEqualInt8x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAndInt32x8 x y mask) - // result: (VPANDDMasked256 x y (VPMOVVec32x8ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterEqualInt8x32 x y) + // result: (VPMOVMToVec8x32 (VPCMPB256 [13] x y)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPANDDMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec8x32) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB256, typ.Mask) + v0.AuxInt = int8ToAuxInt(13) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedAndInt64x2(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpGreaterEqualInt8x64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAndInt64x2 x y mask) - // result: (VPANDQMasked128 x y (VPMOVVec64x2ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterEqualInt8x64 x y) + // result: (VPMOVMToVec8x64 (VPCMPB512 [13] x y)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPANDQMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec8x64) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB512, typ.Mask) + v0.AuxInt = int8ToAuxInt(13) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedAndInt64x4(v *Value) bool { +func rewriteValueAMD64_OpGreaterEqualMaskedFloat32x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAndInt64x4 x y mask) - // result: (VPANDQMasked256 x y (VPMOVVec64x4ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterEqualMaskedFloat32x16 x y mask) + // result: (VPMOVMToVec32x16 (VCMPPSMasked512 [13] x y (VPMOVVec32x16ToM mask))) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPANDQMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec32x16) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(13) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedAndInt64x8(v *Value) bool { +func rewriteValueAMD64_OpGreaterEqualMaskedFloat32x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAndInt64x8 x y mask) - // result: (VPANDQMasked512 x y (VPMOVVec64x8ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterEqualMaskedFloat32x4 x y mask) + // result: (VPMOVMToVec32x4 (VCMPPSMasked128 [13] x y (VPMOVVec32x4ToM mask))) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPANDQMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec32x4) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(13) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedAndNotInt32x16(v *Value) bool { +func rewriteValueAMD64_OpGreaterEqualMaskedFloat32x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAndNotInt32x16 x y mask) - // result: (VPANDNDMasked512 x y (VPMOVVec32x16ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterEqualMaskedFloat32x8 x y mask) + // result: (VPMOVMToVec32x8 (VCMPPSMasked256 [13] x y (VPMOVVec32x8ToM mask))) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPANDNDMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec32x8) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(13) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedAndNotInt32x4(v *Value) bool { +func rewriteValueAMD64_OpGreaterEqualMaskedFloat64x2(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAndNotInt32x4 x y mask) - // result: (VPANDNDMasked128 x y (VPMOVVec32x4ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterEqualMaskedFloat64x2 x y mask) + // result: (VPMOVMToVec64x2 (VCMPPDMasked128 [13] x y (VPMOVVec64x2ToM mask))) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPANDNDMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec64x2) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(13) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedAndNotInt32x8(v *Value) bool { +func rewriteValueAMD64_OpGreaterEqualMaskedFloat64x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAndNotInt32x8 x y mask) - // result: (VPANDNDMasked256 x y (VPMOVVec32x8ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterEqualMaskedFloat64x4 x y mask) + // result: (VPMOVMToVec64x4 (VCMPPDMasked256 [13] x y (VPMOVVec64x4ToM mask))) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPANDNDMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec64x4) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(13) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedAndNotInt64x2(v *Value) bool { +func rewriteValueAMD64_OpGreaterEqualMaskedFloat64x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAndNotInt64x2 x y mask) - // result: (VPANDNQMasked128 x y (VPMOVVec64x2ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterEqualMaskedFloat64x8 x y mask) + // result: (VPMOVMToVec64x8 (VCMPPDMasked512 [13] x y (VPMOVVec64x8ToM mask))) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPANDNQMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec64x8) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(13) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedAndNotInt64x4(v *Value) bool { +func rewriteValueAMD64_OpGreaterEqualMaskedInt16x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAndNotInt64x4 x y mask) - // result: (VPANDNQMasked256 x y (VPMOVVec64x4ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterEqualMaskedInt16x16 x y mask) + // result: (VPMOVMToVec16x16 (VPCMPWMasked256 [13] x y (VPMOVVec16x16ToM mask))) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPANDNQMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec16x16) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(13) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedAndNotInt64x8(v *Value) bool { +func rewriteValueAMD64_OpGreaterEqualMaskedInt16x32(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAndNotInt64x8 x y mask) - // result: (VPANDNQMasked512 x y (VPMOVVec64x8ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterEqualMaskedInt16x32 x y mask) + // result: (VPMOVMToVec16x32 (VPCMPWMasked512 [13] x y (VPMOVVec16x32ToM mask))) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPANDNQMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec16x32) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(13) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedAndNotUint32x16(v *Value) bool { +func rewriteValueAMD64_OpGreaterEqualMaskedInt16x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAndNotUint32x16 x y mask) - // result: (VPANDNDMasked512 x y (VPMOVVec32x16ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterEqualMaskedInt16x8 x y mask) + // result: (VPMOVMToVec16x8 (VPCMPWMasked128 [13] x y (VPMOVVec16x8ToM mask))) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPANDNDMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec16x8) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(13) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedAndNotUint32x4(v *Value) bool { +func rewriteValueAMD64_OpGreaterEqualMaskedInt32x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAndNotUint32x4 x y mask) - // result: (VPANDNDMasked128 x y (VPMOVVec32x4ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterEqualMaskedInt32x16 x y mask) + // result: (VPMOVMToVec32x16 (VPCMPDMasked512 [13] x y (VPMOVVec32x16ToM mask))) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPANDNDMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec32x16) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(13) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedAndNotUint32x8(v *Value) bool { +func rewriteValueAMD64_OpGreaterEqualMaskedInt32x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAndNotUint32x8 x y mask) - // result: (VPANDNDMasked256 x y (VPMOVVec32x8ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterEqualMaskedInt32x4 x y mask) + // result: (VPMOVMToVec32x4 (VPCMPDMasked128 [13] x y (VPMOVVec32x4ToM mask))) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPANDNDMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec32x4) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(13) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedAndNotUint64x2(v *Value) bool { +func rewriteValueAMD64_OpGreaterEqualMaskedInt32x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAndNotUint64x2 x y mask) - // result: (VPANDNQMasked128 x y (VPMOVVec64x2ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterEqualMaskedInt32x8 x y mask) + // result: (VPMOVMToVec32x8 (VPCMPDMasked256 [13] x y (VPMOVVec32x8ToM mask))) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPANDNQMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec32x8) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(13) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedAndNotUint64x4(v *Value) bool { +func rewriteValueAMD64_OpGreaterEqualMaskedInt64x2(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAndNotUint64x4 x y mask) - // result: (VPANDNQMasked256 x y (VPMOVVec64x4ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterEqualMaskedInt64x2 x y mask) + // result: (VPMOVMToVec64x2 (VPCMPQMasked128 [13] x y (VPMOVVec64x2ToM mask))) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPANDNQMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec64x2) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(13) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedAndNotUint64x8(v *Value) bool { +func rewriteValueAMD64_OpGreaterEqualMaskedInt64x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAndNotUint64x8 x y mask) - // result: (VPANDNQMasked512 x y (VPMOVVec64x8ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterEqualMaskedInt64x4 x y mask) + // result: (VPMOVMToVec64x4 (VPCMPQMasked256 [13] x y (VPMOVVec64x4ToM mask))) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPANDNQMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec64x4) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(13) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedAndUint32x16(v *Value) bool { +func rewriteValueAMD64_OpGreaterEqualMaskedInt64x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAndUint32x16 x y mask) - // result: (VPANDDMasked512 x y (VPMOVVec32x16ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterEqualMaskedInt64x8 x y mask) + // result: (VPMOVMToVec64x8 (VPCMPQMasked512 [13] x y (VPMOVVec64x8ToM mask))) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPANDDMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec64x8) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(13) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedAndUint32x4(v *Value) bool { +func rewriteValueAMD64_OpGreaterEqualMaskedInt8x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAndUint32x4 x y mask) - // result: (VPANDDMasked128 x y (VPMOVVec32x4ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterEqualMaskedInt8x16 x y mask) + // result: (VPMOVMToVec8x16 (VPCMPBMasked128 [13] x y (VPMOVVec8x16ToM mask))) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPANDDMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec8x16) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(13) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedAndUint32x8(v *Value) bool { +func rewriteValueAMD64_OpGreaterEqualMaskedInt8x32(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAndUint32x8 x y mask) - // result: (VPANDDMasked256 x y (VPMOVVec32x8ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterEqualMaskedInt8x32 x y mask) + // result: (VPMOVMToVec8x32 (VPCMPBMasked256 [13] x y (VPMOVVec8x32ToM mask))) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPANDDMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec8x32) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(13) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedAndUint64x2(v *Value) bool { +func rewriteValueAMD64_OpGreaterEqualMaskedInt8x64(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAndUint64x2 x y mask) - // result: (VPANDQMasked128 x y (VPMOVVec64x2ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterEqualMaskedInt8x64 x y mask) + // result: (VPMOVMToVec8x64 (VPCMPBMasked512 [13] x y (VPMOVVec8x64ToM mask))) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPANDQMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec8x64) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(13) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedAndUint64x4(v *Value) bool { +func rewriteValueAMD64_OpGreaterEqualMaskedUint16x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAndUint64x4 x y mask) - // result: (VPANDQMasked256 x y (VPMOVVec64x4ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterEqualMaskedUint16x16 x y mask) + // result: (VPMOVMToVec16x16 (VPCMPUWMasked256 [13] x y (VPMOVVec16x16ToM mask))) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPANDQMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec16x16) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(13) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedAndUint64x8(v *Value) bool { +func rewriteValueAMD64_OpGreaterEqualMaskedUint16x32(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAndUint64x8 x y mask) - // result: (VPANDQMasked512 x y (VPMOVVec64x8ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterEqualMaskedUint16x32 x y mask) + // result: (VPMOVMToVec16x32 (VPCMPUWMasked512 [13] x y (VPMOVVec16x32ToM mask))) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPANDQMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec16x32) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(13) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedApproximateReciprocalFloat32x16(v *Value) bool { +func rewriteValueAMD64_OpGreaterEqualMaskedUint16x8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedApproximateReciprocalFloat32x16 x mask) - // result: (VRCP14PSMasked512 x (VPMOVVec32x16ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterEqualMaskedUint16x8 x y mask) + // result: (VPMOVMToVec16x8 (VPCMPUWMasked128 [13] x y (VPMOVVec16x8ToM mask))) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VRCP14PSMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPMOVMToVec16x8) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(13) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedApproximateReciprocalFloat32x4(v *Value) bool { +func rewriteValueAMD64_OpGreaterEqualMaskedUint32x16(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedApproximateReciprocalFloat32x4 x mask) - // result: (VRCP14PSMasked128 x (VPMOVVec32x4ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterEqualMaskedUint32x16 x y mask) + // result: (VPMOVMToVec32x16 (VPCMPUDMasked512 [13] x y (VPMOVVec32x16ToM mask))) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VRCP14PSMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPMOVMToVec32x16) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(13) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedApproximateReciprocalFloat32x8(v *Value) bool { +func rewriteValueAMD64_OpGreaterEqualMaskedUint32x4(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedApproximateReciprocalFloat32x8 x mask) - // result: (VRCP14PSMasked256 x (VPMOVVec32x8ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterEqualMaskedUint32x4 x y mask) + // result: (VPMOVMToVec32x4 (VPCMPUDMasked128 [13] x y (VPMOVVec32x4ToM mask))) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VRCP14PSMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPMOVMToVec32x4) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(13) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedApproximateReciprocalFloat64x2(v *Value) bool { +func rewriteValueAMD64_OpGreaterEqualMaskedUint32x8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedApproximateReciprocalFloat64x2 x mask) - // result: (VRCP14PDMasked128 x (VPMOVVec64x2ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterEqualMaskedUint32x8 x y mask) + // result: (VPMOVMToVec32x8 (VPCMPUDMasked256 [13] x y (VPMOVVec32x8ToM mask))) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VRCP14PDMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPMOVMToVec32x8) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(13) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedApproximateReciprocalFloat64x4(v *Value) bool { +func rewriteValueAMD64_OpGreaterEqualMaskedUint64x2(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedApproximateReciprocalFloat64x4 x mask) - // result: (VRCP14PDMasked256 x (VPMOVVec64x4ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterEqualMaskedUint64x2 x y mask) + // result: (VPMOVMToVec64x2 (VPCMPUQMasked128 [13] x y (VPMOVVec64x2ToM mask))) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VRCP14PDMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPMOVMToVec64x2) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(13) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedApproximateReciprocalFloat64x8(v *Value) bool { +func rewriteValueAMD64_OpGreaterEqualMaskedUint64x4(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedApproximateReciprocalFloat64x8 x mask) - // result: (VRCP14PDMasked512 x (VPMOVVec64x8ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterEqualMaskedUint64x4 x y mask) + // result: (VPMOVMToVec64x4 (VPCMPUQMasked256 [13] x y (VPMOVVec64x4ToM mask))) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VRCP14PDMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPMOVMToVec64x4) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(13) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedApproximateReciprocalOfSqrtFloat32x16(v *Value) bool { +func rewriteValueAMD64_OpGreaterEqualMaskedUint64x8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedApproximateReciprocalOfSqrtFloat32x16 x mask) - // result: (VRSQRT14PSMasked512 x (VPMOVVec32x16ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterEqualMaskedUint64x8 x y mask) + // result: (VPMOVMToVec64x8 (VPCMPUQMasked512 [13] x y (VPMOVVec64x8ToM mask))) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VRSQRT14PSMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPMOVMToVec64x8) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(13) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedApproximateReciprocalOfSqrtFloat32x4(v *Value) bool { +func rewriteValueAMD64_OpGreaterEqualMaskedUint8x16(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedApproximateReciprocalOfSqrtFloat32x4 x mask) - // result: (VRSQRT14PSMasked128 x (VPMOVVec32x4ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterEqualMaskedUint8x16 x y mask) + // result: (VPMOVMToVec8x16 (VPCMPUBMasked128 [13] x y (VPMOVVec8x16ToM mask))) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VRSQRT14PSMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPMOVMToVec8x16) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(13) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedApproximateReciprocalOfSqrtFloat32x8(v *Value) bool { +func rewriteValueAMD64_OpGreaterEqualMaskedUint8x32(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedApproximateReciprocalOfSqrtFloat32x8 x mask) - // result: (VRSQRT14PSMasked256 x (VPMOVVec32x8ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterEqualMaskedUint8x32 x y mask) + // result: (VPMOVMToVec8x32 (VPCMPUBMasked256 [13] x y (VPMOVVec8x32ToM mask))) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VRSQRT14PSMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPMOVMToVec8x32) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(13) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedApproximateReciprocalOfSqrtFloat64x2(v *Value) bool { +func rewriteValueAMD64_OpGreaterEqualMaskedUint8x64(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedApproximateReciprocalOfSqrtFloat64x2 x mask) - // result: (VRSQRT14PDMasked128 x (VPMOVVec64x2ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterEqualMaskedUint8x64 x y mask) + // result: (VPMOVMToVec8x64 (VPCMPUBMasked512 [13] x y (VPMOVVec8x64ToM mask))) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VRSQRT14PDMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPMOVMToVec8x64) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(13) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedApproximateReciprocalOfSqrtFloat64x4(v *Value) bool { +func rewriteValueAMD64_OpGreaterEqualUint16x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedApproximateReciprocalOfSqrtFloat64x4 x mask) - // result: (VRSQRT14PDMasked256 x (VPMOVVec64x4ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterEqualUint16x16 x y) + // result: (VPMOVMToVec16x16 (VPCMPUW256 [13] x y)) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VRSQRT14PDMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + v.reset(OpAMD64VPMOVMToVec16x16) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW256, typ.Mask) + v0.AuxInt = int8ToAuxInt(13) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedApproximateReciprocalOfSqrtFloat64x8(v *Value) bool { +func rewriteValueAMD64_OpGreaterEqualUint16x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedApproximateReciprocalOfSqrtFloat64x8 x mask) - // result: (VRSQRT14PDMasked512 x (VPMOVVec64x8ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterEqualUint16x32 x y) + // result: (VPMOVMToVec16x32 (VPCMPUW512 [13] x y)) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VRSQRT14PDMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + v.reset(OpAMD64VPMOVMToVec16x32) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW512, typ.Mask) + v0.AuxInt = int8ToAuxInt(13) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedAverageUint16x16(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpGreaterEqualUint16x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAverageUint16x16 x y mask) - // result: (VPAVGWMasked256 x y (VPMOVVec16x16ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterEqualUint16x8 x y) + // result: (VPMOVMToVec16x8 (VPCMPUW128 [13] x y)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPAVGWMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec16x8) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW128, typ.Mask) + v0.AuxInt = int8ToAuxInt(13) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedAverageUint16x32(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpGreaterEqualUint32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAverageUint16x32 x y mask) - // result: (VPAVGWMasked512 x y (VPMOVVec16x32ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterEqualUint32x16 x y) + // result: (VPMOVMToVec32x16 (VPCMPUD512 [13] x y)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPAVGWMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec32x16) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD512, typ.Mask) + v0.AuxInt = int8ToAuxInt(13) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedAverageUint16x8(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpGreaterEqualUint32x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAverageUint16x8 x y mask) - // result: (VPAVGWMasked128 x y (VPMOVVec16x8ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterEqualUint32x4 x y) + // result: (VPMOVMToVec32x4 (VPCMPUD128 [13] x y)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPAVGWMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec32x4) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD128, typ.Mask) + v0.AuxInt = int8ToAuxInt(13) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedAverageUint8x16(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpGreaterEqualUint32x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAverageUint8x16 x y mask) - // result: (VPAVGBMasked128 x y (VPMOVVec8x16ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterEqualUint32x8 x y) + // result: (VPMOVMToVec32x8 (VPCMPUD256 [13] x y)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPAVGBMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec32x8) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD256, typ.Mask) + v0.AuxInt = int8ToAuxInt(13) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedAverageUint8x32(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpGreaterEqualUint64x2(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAverageUint8x32 x y mask) - // result: (VPAVGBMasked256 x y (VPMOVVec8x32ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterEqualUint64x2 x y) + // result: (VPMOVMToVec64x2 (VPCMPUQ128 [13] x y)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPAVGBMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec64x2) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ128, typ.Mask) + v0.AuxInt = int8ToAuxInt(13) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedAverageUint8x64(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpGreaterEqualUint64x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedAverageUint8x64 x y mask) - // result: (VPAVGBMasked512 x y (VPMOVVec8x64ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterEqualUint64x4 x y) + // result: (VPMOVMToVec64x4 (VPCMPUQ256 [13] x y)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPAVGBMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec64x4) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ256, typ.Mask) + v0.AuxInt = int8ToAuxInt(13) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedCeilWithPrecisionFloat32x16(v *Value) bool { +func rewriteValueAMD64_OpGreaterEqualUint64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedCeilWithPrecisionFloat32x16 [a] x mask) - // result: (VRNDSCALEPSMasked512 [a+2] x (VPMOVVec32x16ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterEqualUint64x8 x y) + // result: (VPMOVMToVec64x8 (VPCMPUQ512 [13] x y)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VRNDSCALEPSMasked512) - v.AuxInt = int8ToAuxInt(a + 2) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + v.reset(OpAMD64VPMOVMToVec64x8) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ512, typ.Mask) + v0.AuxInt = int8ToAuxInt(13) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedCeilWithPrecisionFloat32x4(v *Value) bool { +func rewriteValueAMD64_OpGreaterEqualUint8x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedCeilWithPrecisionFloat32x4 [a] x mask) - // result: (VRNDSCALEPSMasked128 [a+2] x (VPMOVVec32x4ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterEqualUint8x16 x y) + // result: (VPMOVMToVec8x16 (VPCMPUB128 [13] x y)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VRNDSCALEPSMasked128) - v.AuxInt = int8ToAuxInt(a + 2) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + v.reset(OpAMD64VPMOVMToVec8x16) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB128, typ.Mask) + v0.AuxInt = int8ToAuxInt(13) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedCeilWithPrecisionFloat32x8(v *Value) bool { +func rewriteValueAMD64_OpGreaterEqualUint8x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedCeilWithPrecisionFloat32x8 [a] x mask) - // result: (VRNDSCALEPSMasked256 [a+2] x (VPMOVVec32x8ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterEqualUint8x32 x y) + // result: (VPMOVMToVec8x32 (VPCMPUB256 [13] x y)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VRNDSCALEPSMasked256) - v.AuxInt = int8ToAuxInt(a + 2) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + v.reset(OpAMD64VPMOVMToVec8x32) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB256, typ.Mask) + v0.AuxInt = int8ToAuxInt(13) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedCeilWithPrecisionFloat64x2(v *Value) bool { +func rewriteValueAMD64_OpGreaterEqualUint8x64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedCeilWithPrecisionFloat64x2 [a] x mask) - // result: (VRNDSCALEPDMasked128 [a+2] x (VPMOVVec64x2ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterEqualUint8x64 x y) + // result: (VPMOVMToVec8x64 (VPCMPUB512 [13] x y)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VRNDSCALEPDMasked128) - v.AuxInt = int8ToAuxInt(a + 2) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + v.reset(OpAMD64VPMOVMToVec8x64) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB512, typ.Mask) + v0.AuxInt = int8ToAuxInt(13) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedCeilWithPrecisionFloat64x4(v *Value) bool { +func rewriteValueAMD64_OpGreaterFloat32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedCeilWithPrecisionFloat64x4 [a] x mask) - // result: (VRNDSCALEPDMasked256 [a+2] x (VPMOVVec64x4ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterFloat32x16 x y) + // result: (VPMOVMToVec32x16 (VCMPPS512 [14] x y)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VRNDSCALEPDMasked256) - v.AuxInt = int8ToAuxInt(a + 2) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + v.reset(OpAMD64VPMOVMToVec32x16) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask) + v0.AuxInt = int8ToAuxInt(14) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedCeilWithPrecisionFloat64x8(v *Value) bool { +func rewriteValueAMD64_OpGreaterFloat32x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (MaskedCeilWithPrecisionFloat64x8 [a] x mask) - // result: (VRNDSCALEPDMasked512 [a+2] x (VPMOVVec64x8ToM mask)) + // match: (GreaterFloat32x4 x y) + // result: (VCMPPS128 [14] x y) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VRNDSCALEPDMasked512) - v.AuxInt = int8ToAuxInt(a + 2) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + v.reset(OpAMD64VCMPPS128) + v.AuxInt = int8ToAuxInt(14) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpMaskedDiffWithCeilWithPrecisionFloat32x16(v *Value) bool { +func rewriteValueAMD64_OpGreaterFloat32x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (MaskedDiffWithCeilWithPrecisionFloat32x16 [a] x mask) - // result: (VREDUCEPSMasked512 [a+2] x (VPMOVVec32x16ToM mask)) + // match: (GreaterFloat32x8 x y) + // result: (VCMPPS256 [14] x y) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VREDUCEPSMasked512) - v.AuxInt = int8ToAuxInt(a + 2) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + v.reset(OpAMD64VCMPPS256) + v.AuxInt = int8ToAuxInt(14) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpMaskedDiffWithCeilWithPrecisionFloat32x4(v *Value) bool { +func rewriteValueAMD64_OpGreaterFloat64x2(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (MaskedDiffWithCeilWithPrecisionFloat32x4 [a] x mask) - // result: (VREDUCEPSMasked128 [a+2] x (VPMOVVec32x4ToM mask)) + // match: (GreaterFloat64x2 x y) + // result: (VCMPPD128 [14] x y) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VREDUCEPSMasked128) - v.AuxInt = int8ToAuxInt(a + 2) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + v.reset(OpAMD64VCMPPD128) + v.AuxInt = int8ToAuxInt(14) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpMaskedDiffWithCeilWithPrecisionFloat32x8(v *Value) bool { +func rewriteValueAMD64_OpGreaterFloat64x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (MaskedDiffWithCeilWithPrecisionFloat32x8 [a] x mask) - // result: (VREDUCEPSMasked256 [a+2] x (VPMOVVec32x8ToM mask)) + // match: (GreaterFloat64x4 x y) + // result: (VCMPPD256 [14] x y) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VREDUCEPSMasked256) - v.AuxInt = int8ToAuxInt(a + 2) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + v.reset(OpAMD64VCMPPD256) + v.AuxInt = int8ToAuxInt(14) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpMaskedDiffWithCeilWithPrecisionFloat64x2(v *Value) bool { +func rewriteValueAMD64_OpGreaterFloat64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedDiffWithCeilWithPrecisionFloat64x2 [a] x mask) - // result: (VREDUCEPDMasked128 [a+2] x (VPMOVVec64x2ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterFloat64x8 x y) + // result: (VPMOVMToVec64x8 (VCMPPD512 [14] x y)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VREDUCEPDMasked128) - v.AuxInt = int8ToAuxInt(a + 2) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + v.reset(OpAMD64VPMOVMToVec64x8) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask) + v0.AuxInt = int8ToAuxInt(14) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedDiffWithCeilWithPrecisionFloat64x4(v *Value) bool { +func rewriteValueAMD64_OpGreaterInt16x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedDiffWithCeilWithPrecisionFloat64x4 [a] x mask) - // result: (VREDUCEPDMasked256 [a+2] x (VPMOVVec64x4ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterInt16x32 x y) + // result: (VPMOVMToVec16x32 (VPCMPW512 [14] x y)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VREDUCEPDMasked256) - v.AuxInt = int8ToAuxInt(a + 2) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + v.reset(OpAMD64VPMOVMToVec16x32) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW512, typ.Mask) + v0.AuxInt = int8ToAuxInt(14) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedDiffWithCeilWithPrecisionFloat64x8(v *Value) bool { +func rewriteValueAMD64_OpGreaterInt32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedDiffWithCeilWithPrecisionFloat64x8 [a] x mask) - // result: (VREDUCEPDMasked512 [a+2] x (VPMOVVec64x8ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterInt32x16 x y) + // result: (VPMOVMToVec32x16 (VPCMPD512 [14] x y)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VREDUCEPDMasked512) - v.AuxInt = int8ToAuxInt(a + 2) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + v.reset(OpAMD64VPMOVMToVec32x16) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD512, typ.Mask) + v0.AuxInt = int8ToAuxInt(14) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedDiffWithFloorWithPrecisionFloat32x16(v *Value) bool { +func rewriteValueAMD64_OpGreaterInt64x2(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedDiffWithFloorWithPrecisionFloat32x16 [a] x mask) - // result: (VREDUCEPSMasked512 [a+1] x (VPMOVVec32x16ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterInt64x2 x y) + // result: (VPMOVMToVec64x2 (VPCMPQ128 [14] x y)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VREDUCEPSMasked512) - v.AuxInt = int8ToAuxInt(a + 1) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + v.reset(OpAMD64VPMOVMToVec64x2) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ128, typ.Mask) + v0.AuxInt = int8ToAuxInt(14) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedDiffWithFloorWithPrecisionFloat32x4(v *Value) bool { +func rewriteValueAMD64_OpGreaterInt64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedDiffWithFloorWithPrecisionFloat32x4 [a] x mask) - // result: (VREDUCEPSMasked128 [a+1] x (VPMOVVec32x4ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterInt64x8 x y) + // result: (VPMOVMToVec64x8 (VPCMPQ512 [14] x y)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VREDUCEPSMasked128) - v.AuxInt = int8ToAuxInt(a + 1) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + v.reset(OpAMD64VPMOVMToVec64x8) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ512, typ.Mask) + v0.AuxInt = int8ToAuxInt(14) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedDiffWithFloorWithPrecisionFloat32x8(v *Value) bool { +func rewriteValueAMD64_OpGreaterInt8x64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedDiffWithFloorWithPrecisionFloat32x8 [a] x mask) - // result: (VREDUCEPSMasked256 [a+1] x (VPMOVVec32x8ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterInt8x64 x y) + // result: (VPMOVMToVec8x64 (VPCMPB512 [14] x y)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VREDUCEPSMasked256) - v.AuxInt = int8ToAuxInt(a + 1) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + v.reset(OpAMD64VPMOVMToVec8x64) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB512, typ.Mask) + v0.AuxInt = int8ToAuxInt(14) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedDiffWithFloorWithPrecisionFloat64x2(v *Value) bool { +func rewriteValueAMD64_OpGreaterMaskedFloat32x16(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedDiffWithFloorWithPrecisionFloat64x2 [a] x mask) - // result: (VREDUCEPDMasked128 [a+1] x (VPMOVVec64x2ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterMaskedFloat32x16 x y mask) + // result: (VPMOVMToVec32x16 (VCMPPSMasked512 [14] x y (VPMOVVec32x16ToM mask))) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VREDUCEPDMasked128) - v.AuxInt = int8ToAuxInt(a + 1) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPMOVMToVec32x16) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(14) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedDiffWithFloorWithPrecisionFloat64x4(v *Value) bool { +func rewriteValueAMD64_OpGreaterMaskedFloat32x4(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedDiffWithFloorWithPrecisionFloat64x4 [a] x mask) - // result: (VREDUCEPDMasked256 [a+1] x (VPMOVVec64x4ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterMaskedFloat32x4 x y mask) + // result: (VPMOVMToVec32x4 (VCMPPSMasked128 [14] x y (VPMOVVec32x4ToM mask))) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VREDUCEPDMasked256) - v.AuxInt = int8ToAuxInt(a + 1) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPMOVMToVec32x4) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(14) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedDiffWithFloorWithPrecisionFloat64x8(v *Value) bool { +func rewriteValueAMD64_OpGreaterMaskedFloat32x8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedDiffWithFloorWithPrecisionFloat64x8 [a] x mask) - // result: (VREDUCEPDMasked512 [a+1] x (VPMOVVec64x8ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterMaskedFloat32x8 x y mask) + // result: (VPMOVMToVec32x8 (VCMPPSMasked256 [14] x y (VPMOVVec32x8ToM mask))) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VREDUCEPDMasked512) - v.AuxInt = int8ToAuxInt(a + 1) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPMOVMToVec32x8) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(14) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedDiffWithRoundWithPrecisionFloat32x16(v *Value) bool { +func rewriteValueAMD64_OpGreaterMaskedFloat64x2(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedDiffWithRoundWithPrecisionFloat32x16 [a] x mask) - // result: (VREDUCEPSMasked512 [a+0] x (VPMOVVec32x16ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterMaskedFloat64x2 x y mask) + // result: (VPMOVMToVec64x2 (VCMPPDMasked128 [14] x y (VPMOVVec64x2ToM mask))) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VREDUCEPSMasked512) - v.AuxInt = int8ToAuxInt(a + 0) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPMOVMToVec64x2) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(14) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedDiffWithRoundWithPrecisionFloat32x4(v *Value) bool { +func rewriteValueAMD64_OpGreaterMaskedFloat64x4(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedDiffWithRoundWithPrecisionFloat32x4 [a] x mask) - // result: (VREDUCEPSMasked128 [a+0] x (VPMOVVec32x4ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterMaskedFloat64x4 x y mask) + // result: (VPMOVMToVec64x4 (VCMPPDMasked256 [14] x y (VPMOVVec64x4ToM mask))) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VREDUCEPSMasked128) - v.AuxInt = int8ToAuxInt(a + 0) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPMOVMToVec64x4) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(14) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedDiffWithRoundWithPrecisionFloat32x8(v *Value) bool { +func rewriteValueAMD64_OpGreaterMaskedFloat64x8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedDiffWithRoundWithPrecisionFloat32x8 [a] x mask) - // result: (VREDUCEPSMasked256 [a+0] x (VPMOVVec32x8ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterMaskedFloat64x8 x y mask) + // result: (VPMOVMToVec64x8 (VCMPPDMasked512 [14] x y (VPMOVVec64x8ToM mask))) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VREDUCEPSMasked256) - v.AuxInt = int8ToAuxInt(a + 0) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPMOVMToVec64x8) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(14) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedDiffWithRoundWithPrecisionFloat64x2(v *Value) bool { +func rewriteValueAMD64_OpGreaterMaskedInt16x16(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedDiffWithRoundWithPrecisionFloat64x2 [a] x mask) - // result: (VREDUCEPDMasked128 [a+0] x (VPMOVVec64x2ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterMaskedInt16x16 x y mask) + // result: (VPMOVMToVec16x16 (VPCMPWMasked256 [14] x y (VPMOVVec16x16ToM mask))) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VREDUCEPDMasked128) - v.AuxInt = int8ToAuxInt(a + 0) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPMOVMToVec16x16) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(14) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedDiffWithRoundWithPrecisionFloat64x4(v *Value) bool { +func rewriteValueAMD64_OpGreaterMaskedInt16x32(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedDiffWithRoundWithPrecisionFloat64x4 [a] x mask) - // result: (VREDUCEPDMasked256 [a+0] x (VPMOVVec64x4ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterMaskedInt16x32 x y mask) + // result: (VPMOVMToVec16x32 (VPCMPWMasked512 [14] x y (VPMOVVec16x32ToM mask))) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VREDUCEPDMasked256) - v.AuxInt = int8ToAuxInt(a + 0) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPMOVMToVec16x32) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(14) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedDiffWithRoundWithPrecisionFloat64x8(v *Value) bool { +func rewriteValueAMD64_OpGreaterMaskedInt16x8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedDiffWithRoundWithPrecisionFloat64x8 [a] x mask) - // result: (VREDUCEPDMasked512 [a+0] x (VPMOVVec64x8ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterMaskedInt16x8 x y mask) + // result: (VPMOVMToVec16x8 (VPCMPWMasked128 [14] x y (VPMOVVec16x8ToM mask))) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VREDUCEPDMasked512) - v.AuxInt = int8ToAuxInt(a + 0) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPMOVMToVec16x8) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(14) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedDiffWithTruncWithPrecisionFloat32x16(v *Value) bool { +func rewriteValueAMD64_OpGreaterMaskedInt32x16(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedDiffWithTruncWithPrecisionFloat32x16 [a] x mask) - // result: (VREDUCEPSMasked512 [a+3] x (VPMOVVec32x16ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterMaskedInt32x16 x y mask) + // result: (VPMOVMToVec32x16 (VPCMPDMasked512 [14] x y (VPMOVVec32x16ToM mask))) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VREDUCEPSMasked512) - v.AuxInt = int8ToAuxInt(a + 3) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPMOVMToVec32x16) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(14) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedDiffWithTruncWithPrecisionFloat32x4(v *Value) bool { +func rewriteValueAMD64_OpGreaterMaskedInt32x4(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedDiffWithTruncWithPrecisionFloat32x4 [a] x mask) - // result: (VREDUCEPSMasked128 [a+3] x (VPMOVVec32x4ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterMaskedInt32x4 x y mask) + // result: (VPMOVMToVec32x4 (VPCMPDMasked128 [14] x y (VPMOVVec32x4ToM mask))) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VREDUCEPSMasked128) - v.AuxInt = int8ToAuxInt(a + 3) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPMOVMToVec32x4) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(14) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedDiffWithTruncWithPrecisionFloat32x8(v *Value) bool { +func rewriteValueAMD64_OpGreaterMaskedInt32x8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (MaskedDiffWithTruncWithPrecisionFloat32x8 [a] x mask) - // result: (VREDUCEPSMasked256 [a+3] x (VPMOVVec32x8ToM mask)) - for { - a := auxIntToInt8(v.AuxInt) - x := v_0 - mask := v_1 - v.reset(OpAMD64VREDUCEPSMasked256) - v.AuxInt = int8ToAuxInt(a + 3) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + b := v.Block + typ := &b.Func.Config.Types + // match: (GreaterMaskedInt32x8 x y mask) + // result: (VPMOVMToVec32x8 (VPCMPDMasked256 [14] x y (VPMOVVec32x8ToM mask))) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPMOVMToVec32x8) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(14) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedDiffWithTruncWithPrecisionFloat64x2(v *Value) bool { +func rewriteValueAMD64_OpGreaterMaskedInt64x2(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedDiffWithTruncWithPrecisionFloat64x2 [a] x mask) - // result: (VREDUCEPDMasked128 [a+3] x (VPMOVVec64x2ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterMaskedInt64x2 x y mask) + // result: (VPMOVMToVec64x2 (VPCMPQMasked128 [14] x y (VPMOVVec64x2ToM mask))) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VREDUCEPDMasked128) - v.AuxInt = int8ToAuxInt(a + 3) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPMOVMToVec64x2) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(14) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedDiffWithTruncWithPrecisionFloat64x4(v *Value) bool { +func rewriteValueAMD64_OpGreaterMaskedInt64x4(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedDiffWithTruncWithPrecisionFloat64x4 [a] x mask) - // result: (VREDUCEPDMasked256 [a+3] x (VPMOVVec64x4ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterMaskedInt64x4 x y mask) + // result: (VPMOVMToVec64x4 (VPCMPQMasked256 [14] x y (VPMOVVec64x4ToM mask))) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VREDUCEPDMasked256) - v.AuxInt = int8ToAuxInt(a + 3) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPMOVMToVec64x4) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(14) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedDiffWithTruncWithPrecisionFloat64x8(v *Value) bool { +func rewriteValueAMD64_OpGreaterMaskedInt64x8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedDiffWithTruncWithPrecisionFloat64x8 [a] x mask) - // result: (VREDUCEPDMasked512 [a+3] x (VPMOVVec64x8ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterMaskedInt64x8 x y mask) + // result: (VPMOVMToVec64x8 (VPCMPQMasked512 [14] x y (VPMOVVec64x8ToM mask))) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VREDUCEPDMasked512) - v.AuxInt = int8ToAuxInt(a + 3) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPMOVMToVec64x8) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(14) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedDivFloat32x16(v *Value) bool { +func rewriteValueAMD64_OpGreaterMaskedInt8x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedDivFloat32x16 x y mask) - // result: (VDIVPSMasked512 x y (VPMOVVec32x16ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterMaskedInt8x16 x y mask) + // result: (VPMOVMToVec8x16 (VPCMPBMasked128 [14] x y (VPMOVVec8x16ToM mask))) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VDIVPSMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec8x16) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(14) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedDivFloat32x4(v *Value) bool { +func rewriteValueAMD64_OpGreaterMaskedInt8x32(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedDivFloat32x4 x y mask) - // result: (VDIVPSMasked128 x y (VPMOVVec32x4ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterMaskedInt8x32 x y mask) + // result: (VPMOVMToVec8x32 (VPCMPBMasked256 [14] x y (VPMOVVec8x32ToM mask))) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VDIVPSMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec8x32) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(14) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedDivFloat32x8(v *Value) bool { +func rewriteValueAMD64_OpGreaterMaskedInt8x64(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedDivFloat32x8 x y mask) - // result: (VDIVPSMasked256 x y (VPMOVVec32x8ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterMaskedInt8x64 x y mask) + // result: (VPMOVMToVec8x64 (VPCMPBMasked512 [14] x y (VPMOVVec8x64ToM mask))) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VDIVPSMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec8x64) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(14) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedDivFloat64x2(v *Value) bool { +func rewriteValueAMD64_OpGreaterMaskedUint16x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedDivFloat64x2 x y mask) - // result: (VDIVPDMasked128 x y (VPMOVVec64x2ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterMaskedUint16x16 x y mask) + // result: (VPMOVMToVec16x16 (VPCMPUWMasked256 [14] x y (VPMOVVec16x16ToM mask))) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VDIVPDMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec16x16) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(14) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedDivFloat64x4(v *Value) bool { +func rewriteValueAMD64_OpGreaterMaskedUint16x32(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedDivFloat64x4 x y mask) - // result: (VDIVPDMasked256 x y (VPMOVVec64x4ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterMaskedUint16x32 x y mask) + // result: (VPMOVMToVec16x32 (VPCMPUWMasked512 [14] x y (VPMOVVec16x32ToM mask))) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VDIVPDMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec16x32) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(14) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedDivFloat64x8(v *Value) bool { +func rewriteValueAMD64_OpGreaterMaskedUint16x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedDivFloat64x8 x y mask) - // result: (VDIVPDMasked512 x y (VPMOVVec64x8ToM mask)) + typ := &b.Func.Config.Types + // match: (GreaterMaskedUint16x8 x y mask) + // result: (VPMOVMToVec16x8 (VPCMPUWMasked128 [14] x y (VPMOVVec16x8ToM mask))) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VDIVPDMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec16x8) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(14) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedEqualFloat32x16(v *Value) bool { +func rewriteValueAMD64_OpGreaterMaskedUint32x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedEqualFloat32x16 x y mask) - // result: (VPMOVMToVec32x16 (VCMPPSMasked512 [0] x y (VPMOVVec32x16ToM mask))) + // match: (GreaterMaskedUint32x16 x y mask) + // result: (VPMOVMToVec32x16 (VPCMPUDMasked512 [14] x y (VPMOVVec32x16ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(0) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(14) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -36539,21 +36413,21 @@ func rewriteValueAMD64_OpMaskedEqualFloat32x16(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedEqualFloat32x4(v *Value) bool { +func rewriteValueAMD64_OpGreaterMaskedUint32x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedEqualFloat32x4 x y mask) - // result: (VPMOVMToVec32x4 (VCMPPSMasked128 [0] x y (VPMOVVec32x4ToM mask))) + // match: (GreaterMaskedUint32x4 x y mask) + // result: (VPMOVMToVec32x4 (VPCMPUDMasked128 [14] x y (VPMOVVec32x4ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec32x4) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(0) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(14) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -36561,21 +36435,21 @@ func rewriteValueAMD64_OpMaskedEqualFloat32x4(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedEqualFloat32x8(v *Value) bool { +func rewriteValueAMD64_OpGreaterMaskedUint32x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedEqualFloat32x8 x y mask) - // result: (VPMOVMToVec32x8 (VCMPPSMasked256 [0] x y (VPMOVVec32x8ToM mask))) + // match: (GreaterMaskedUint32x8 x y mask) + // result: (VPMOVMToVec32x8 (VPCMPUDMasked256 [14] x y (VPMOVVec32x8ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec32x8) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(0) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(14) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -36583,21 +36457,21 @@ func rewriteValueAMD64_OpMaskedEqualFloat32x8(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedEqualFloat64x2(v *Value) bool { +func rewriteValueAMD64_OpGreaterMaskedUint64x2(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedEqualFloat64x2 x y mask) - // result: (VPMOVMToVec64x2 (VCMPPDMasked128 [0] x y (VPMOVVec64x2ToM mask))) + // match: (GreaterMaskedUint64x2 x y mask) + // result: (VPMOVMToVec64x2 (VPCMPUQMasked128 [14] x y (VPMOVVec64x2ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec64x2) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(0) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(14) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -36605,21 +36479,21 @@ func rewriteValueAMD64_OpMaskedEqualFloat64x2(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedEqualFloat64x4(v *Value) bool { +func rewriteValueAMD64_OpGreaterMaskedUint64x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedEqualFloat64x4 x y mask) - // result: (VPMOVMToVec64x4 (VCMPPDMasked256 [0] x y (VPMOVVec64x4ToM mask))) + // match: (GreaterMaskedUint64x4 x y mask) + // result: (VPMOVMToVec64x4 (VPCMPUQMasked256 [14] x y (VPMOVVec64x4ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec64x4) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(0) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(14) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -36627,21 +36501,21 @@ func rewriteValueAMD64_OpMaskedEqualFloat64x4(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedEqualFloat64x8(v *Value) bool { +func rewriteValueAMD64_OpGreaterMaskedUint64x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedEqualFloat64x8 x y mask) - // result: (VPMOVMToVec64x8 (VCMPPDMasked512 [0] x y (VPMOVVec64x8ToM mask))) + // match: (GreaterMaskedUint64x8 x y mask) + // result: (VPMOVMToVec64x8 (VPCMPUQMasked512 [14] x y (VPMOVVec64x8ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(0) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(14) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -36649,351 +36523,428 @@ func rewriteValueAMD64_OpMaskedEqualFloat64x8(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedEqualInt16x16(v *Value) bool { +func rewriteValueAMD64_OpGreaterMaskedUint8x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedEqualInt16x16 x y mask) - // result: (VPMOVMToVec16x16 (VPCMPWMasked256 [0] x y (VPMOVVec16x16ToM mask))) + // match: (GreaterMaskedUint8x16 x y mask) + // result: (VPMOVMToVec8x16 (VPCMPUBMasked128 [14] x y (VPMOVVec8x16ToM mask))) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPMOVMToVec16x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(0) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v.reset(OpAMD64VPMOVMToVec8x16) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(14) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedEqualInt16x32(v *Value) bool { +func rewriteValueAMD64_OpGreaterMaskedUint8x32(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedEqualInt16x32 x y mask) - // result: (VPMOVMToVec16x32 (VPCMPWMasked512 [0] x y (VPMOVVec16x32ToM mask))) + // match: (GreaterMaskedUint8x32 x y mask) + // result: (VPMOVMToVec8x32 (VPCMPUBMasked256 [14] x y (VPMOVVec8x32ToM mask))) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPMOVMToVec16x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(0) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v.reset(OpAMD64VPMOVMToVec8x32) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(14) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedEqualInt16x8(v *Value) bool { +func rewriteValueAMD64_OpGreaterMaskedUint8x64(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedEqualInt16x8 x y mask) - // result: (VPMOVMToVec16x8 (VPCMPWMasked128 [0] x y (VPMOVVec16x8ToM mask))) + // match: (GreaterMaskedUint8x64 x y mask) + // result: (VPMOVMToVec8x64 (VPCMPUBMasked512 [14] x y (VPMOVVec8x64ToM mask))) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPMOVMToVec16x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(0) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v.reset(OpAMD64VPMOVMToVec8x64) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(14) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedEqualInt32x16(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpGreaterUint16x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedEqualInt32x16 x y mask) - // result: (VPMOVMToVec32x16 (VPCMPDMasked512 [0] x y (VPMOVVec32x16ToM mask))) + // match: (GreaterUint16x16 x y) + // result: (VPMOVMToVec16x16 (VPCMPUW256 [14] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec16x16) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW256, typ.Mask) + v0.AuxInt = int8ToAuxInt(14) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpGreaterUint16x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (GreaterUint16x32 x y) + // result: (VPMOVMToVec16x32 (VPCMPUW512 [14] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec16x32) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW512, typ.Mask) + v0.AuxInt = int8ToAuxInt(14) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpGreaterUint16x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (GreaterUint16x8 x y) + // result: (VPMOVMToVec16x8 (VPCMPUW128 [14] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec16x8) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW128, typ.Mask) + v0.AuxInt = int8ToAuxInt(14) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpGreaterUint32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (GreaterUint32x16 x y) + // result: (VPMOVMToVec32x16 (VPCMPUD512 [14] x y)) for { x := v_0 y := v_1 - mask := v_2 v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(0) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD512, typ.Mask) + v0.AuxInt = int8ToAuxInt(14) + v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedEqualInt32x4(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpGreaterUint32x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedEqualInt32x4 x y mask) - // result: (VPMOVMToVec32x4 (VPCMPDMasked128 [0] x y (VPMOVVec32x4ToM mask))) + // match: (GreaterUint32x4 x y) + // result: (VPMOVMToVec32x4 (VPCMPUD128 [14] x y)) for { x := v_0 y := v_1 - mask := v_2 v.reset(OpAMD64VPMOVMToVec32x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(0) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD128, typ.Mask) + v0.AuxInt = int8ToAuxInt(14) + v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedEqualInt32x8(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpGreaterUint32x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedEqualInt32x8 x y mask) - // result: (VPMOVMToVec32x8 (VPCMPDMasked256 [0] x y (VPMOVVec32x8ToM mask))) + // match: (GreaterUint32x8 x y) + // result: (VPMOVMToVec32x8 (VPCMPUD256 [14] x y)) for { x := v_0 y := v_1 - mask := v_2 v.reset(OpAMD64VPMOVMToVec32x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(0) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD256, typ.Mask) + v0.AuxInt = int8ToAuxInt(14) + v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedEqualInt64x2(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpGreaterUint64x2(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedEqualInt64x2 x y mask) - // result: (VPMOVMToVec64x2 (VPCMPQMasked128 [0] x y (VPMOVVec64x2ToM mask))) + // match: (GreaterUint64x2 x y) + // result: (VPMOVMToVec64x2 (VPCMPUQ128 [14] x y)) for { x := v_0 y := v_1 - mask := v_2 v.reset(OpAMD64VPMOVMToVec64x2) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(0) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ128, typ.Mask) + v0.AuxInt = int8ToAuxInt(14) + v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedEqualInt64x4(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpGreaterUint64x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedEqualInt64x4 x y mask) - // result: (VPMOVMToVec64x4 (VPCMPQMasked256 [0] x y (VPMOVVec64x4ToM mask))) + // match: (GreaterUint64x4 x y) + // result: (VPMOVMToVec64x4 (VPCMPUQ256 [14] x y)) for { x := v_0 y := v_1 - mask := v_2 v.reset(OpAMD64VPMOVMToVec64x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(0) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ256, typ.Mask) + v0.AuxInt = int8ToAuxInt(14) + v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedEqualInt64x8(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpGreaterUint64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedEqualInt64x8 x y mask) - // result: (VPMOVMToVec64x8 (VPCMPQMasked512 [0] x y (VPMOVVec64x8ToM mask))) + // match: (GreaterUint64x8 x y) + // result: (VPMOVMToVec64x8 (VPCMPUQ512 [14] x y)) for { x := v_0 y := v_1 - mask := v_2 v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(0) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ512, typ.Mask) + v0.AuxInt = int8ToAuxInt(14) + v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedEqualInt8x16(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpGreaterUint8x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedEqualInt8x16 x y mask) - // result: (VPMOVMToVec8x16 (VPCMPBMasked128 [0] x y (VPMOVVec8x16ToM mask))) + // match: (GreaterUint8x16 x y) + // result: (VPMOVMToVec8x16 (VPCMPUB128 [14] x y)) for { x := v_0 y := v_1 - mask := v_2 v.reset(OpAMD64VPMOVMToVec8x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(0) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB128, typ.Mask) + v0.AuxInt = int8ToAuxInt(14) + v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedEqualInt8x32(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpGreaterUint8x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedEqualInt8x32 x y mask) - // result: (VPMOVMToVec8x32 (VPCMPBMasked256 [0] x y (VPMOVVec8x32ToM mask))) + // match: (GreaterUint8x32 x y) + // result: (VPMOVMToVec8x32 (VPCMPUB256 [14] x y)) for { x := v_0 y := v_1 - mask := v_2 v.reset(OpAMD64VPMOVMToVec8x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(0) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB256, typ.Mask) + v0.AuxInt = int8ToAuxInt(14) + v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedEqualInt8x64(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpGreaterUint8x64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedEqualInt8x64 x y mask) - // result: (VPMOVMToVec8x64 (VPCMPBMasked512 [0] x y (VPMOVVec8x64ToM mask))) + // match: (GreaterUint8x64 x y) + // result: (VPMOVMToVec8x64 (VPCMPUB512 [14] x y)) for { x := v_0 y := v_1 - mask := v_2 v.reset(OpAMD64VPMOVMToVec8x64) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(0) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB512, typ.Mask) + v0.AuxInt = int8ToAuxInt(14) + v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedEqualUint16x16(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpHasCPUFeature(v *Value) bool { + b := v.Block + typ := &b.Func.Config.Types + // match: (HasCPUFeature {s}) + // result: (SETNE (CMPLconst [0] (LoweredHasCPUFeature {s}))) + for { + s := auxToSym(v.Aux) + v.reset(OpAMD64SETNE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) + v0.AuxInt = int32ToAuxInt(0) + v1 := b.NewValue0(v.Pos, OpAMD64LoweredHasCPUFeature, typ.UInt64) + v1.Aux = symToAux(s) + v0.AddArg(v1) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpIsInBounds(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedEqualUint16x16 x y mask) - // result: (VPMOVMToVec16x16 (VPCMPUWMasked256 [0] x y (VPMOVVec16x16ToM mask))) + // match: (IsInBounds idx len) + // result: (SETB (CMPQ idx len)) for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec16x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(0) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) + idx := v_0 + len := v_1 + v.reset(OpAMD64SETB) + v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags) + v0.AddArg2(idx, len) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedEqualUint16x32(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpIsNanFloat32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedEqualUint16x32 x y mask) - // result: (VPMOVMToVec16x32 (VPCMPUWMasked512 [0] x y (VPMOVVec16x32ToM mask))) + // match: (IsNanFloat32x16 x y) + // result: (VPMOVMToVec32x16 (VCMPPS512 [3] x y)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec16x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(0) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) + v.reset(OpAMD64VPMOVMToVec32x16) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask) + v0.AuxInt = int8ToAuxInt(3) + v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedEqualUint16x8(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpIsNanFloat32x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (IsNanFloat32x4 x y) + // result: (VCMPPS128 [3] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VCMPPS128) + v.AuxInt = int8ToAuxInt(3) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpIsNanFloat32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (IsNanFloat32x8 x y) + // result: (VCMPPS256 [3] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VCMPPS256) + v.AuxInt = int8ToAuxInt(3) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpIsNanFloat64x2(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (IsNanFloat64x2 x y) + // result: (VCMPPD128 [3] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VCMPPD128) + v.AuxInt = int8ToAuxInt(3) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpIsNanFloat64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (IsNanFloat64x4 x y) + // result: (VCMPPD256 [3] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VCMPPD256) + v.AuxInt = int8ToAuxInt(3) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpIsNanFloat64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedEqualUint16x8 x y mask) - // result: (VPMOVMToVec16x8 (VPCMPUWMasked128 [0] x y (VPMOVVec16x8ToM mask))) + // match: (IsNanFloat64x8 x y) + // result: (VPMOVMToVec64x8 (VCMPPD512 [3] x y)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec16x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(0) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) + v.reset(OpAMD64VPMOVMToVec64x8) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask) + v0.AuxInt = int8ToAuxInt(3) + v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedEqualUint32x16(v *Value) bool { +func rewriteValueAMD64_OpIsNanMaskedFloat32x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedEqualUint32x16 x y mask) - // result: (VPMOVMToVec32x16 (VPCMPUDMasked512 [0] x y (VPMOVVec32x16ToM mask))) + // match: (IsNanMaskedFloat32x16 x y mask) + // result: (VPMOVMToVec32x16 (VCMPPSMasked512 [3] x y (VPMOVVec32x16ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(0) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(3) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -37001,21 +36952,21 @@ func rewriteValueAMD64_OpMaskedEqualUint32x16(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedEqualUint32x4(v *Value) bool { +func rewriteValueAMD64_OpIsNanMaskedFloat32x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedEqualUint32x4 x y mask) - // result: (VPMOVMToVec32x4 (VPCMPUDMasked128 [0] x y (VPMOVVec32x4ToM mask))) + // match: (IsNanMaskedFloat32x4 x y mask) + // result: (VPMOVMToVec32x4 (VCMPPSMasked128 [3] x y (VPMOVVec32x4ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec32x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(0) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(3) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -37023,21 +36974,21 @@ func rewriteValueAMD64_OpMaskedEqualUint32x4(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedEqualUint32x8(v *Value) bool { +func rewriteValueAMD64_OpIsNanMaskedFloat32x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedEqualUint32x8 x y mask) - // result: (VPMOVMToVec32x8 (VPCMPUDMasked256 [0] x y (VPMOVVec32x8ToM mask))) + // match: (IsNanMaskedFloat32x8 x y mask) + // result: (VPMOVMToVec32x8 (VCMPPSMasked256 [3] x y (VPMOVVec32x8ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec32x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(0) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(3) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -37045,21 +36996,21 @@ func rewriteValueAMD64_OpMaskedEqualUint32x8(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedEqualUint64x2(v *Value) bool { +func rewriteValueAMD64_OpIsNanMaskedFloat64x2(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedEqualUint64x2 x y mask) - // result: (VPMOVMToVec64x2 (VPCMPUQMasked128 [0] x y (VPMOVVec64x2ToM mask))) + // match: (IsNanMaskedFloat64x2 x y mask) + // result: (VPMOVMToVec64x2 (VCMPPDMasked128 [3] x y (VPMOVVec64x2ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec64x2) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(0) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(3) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -37067,21 +37018,21 @@ func rewriteValueAMD64_OpMaskedEqualUint64x2(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedEqualUint64x4(v *Value) bool { +func rewriteValueAMD64_OpIsNanMaskedFloat64x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedEqualUint64x4 x y mask) - // result: (VPMOVMToVec64x4 (VPCMPUQMasked256 [0] x y (VPMOVVec64x4ToM mask))) + // match: (IsNanMaskedFloat64x4 x y mask) + // result: (VPMOVMToVec64x4 (VCMPPDMasked256 [3] x y (VPMOVVec64x4ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec64x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(0) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(3) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -37089,21 +37040,21 @@ func rewriteValueAMD64_OpMaskedEqualUint64x4(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedEqualUint64x8(v *Value) bool { +func rewriteValueAMD64_OpIsNanMaskedFloat64x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedEqualUint64x8 x y mask) - // result: (VPMOVMToVec64x8 (VPCMPUQMasked512 [0] x y (VPMOVVec64x8ToM mask))) + // match: (IsNanMaskedFloat64x8 x y mask) + // result: (VPMOVMToVec64x8 (VCMPPDMasked512 [3] x y (VPMOVVec64x8ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(0) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(3) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -37111,729 +37062,679 @@ func rewriteValueAMD64_OpMaskedEqualUint64x8(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedEqualUint8x16(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpIsNonNil(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + // match: (IsNonNil p) + // result: (SETNE (TESTQ p p)) + for { + p := v_0 + v.reset(OpAMD64SETNE) + v0 := b.NewValue0(v.Pos, OpAMD64TESTQ, types.TypeFlags) + v0.AddArg2(p, p) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpIsSliceInBounds(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedEqualUint8x16 x y mask) - // result: (VPMOVMToVec8x16 (VPCMPUBMasked128 [0] x y (VPMOVVec8x16ToM mask))) + // match: (IsSliceInBounds idx len) + // result: (SETBE (CMPQ idx len)) + for { + idx := v_0 + len := v_1 + v.reset(OpAMD64SETBE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags) + v0.AddArg2(idx, len) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpLeq16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Leq16 x y) + // result: (SETLE (CMPW x y)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec8x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(0) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) + v.reset(OpAMD64SETLE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPW, types.TypeFlags) + v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedEqualUint8x32(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLeq16U(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedEqualUint8x32 x y mask) - // result: (VPMOVMToVec8x32 (VPCMPUBMasked256 [0] x y (VPMOVVec8x32ToM mask))) + // match: (Leq16U x y) + // result: (SETBE (CMPW x y)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec8x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(0) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) + v.reset(OpAMD64SETBE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPW, types.TypeFlags) + v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedEqualUint8x64(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLeq32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedEqualUint8x64 x y mask) - // result: (VPMOVMToVec8x64 (VPCMPUBMasked512 [0] x y (VPMOVVec8x64ToM mask))) + // match: (Leq32 x y) + // result: (SETLE (CMPL x y)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec8x64) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(0) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) + v.reset(OpAMD64SETLE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPL, types.TypeFlags) + v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedFloorWithPrecisionFloat32x16(v *Value) bool { +func rewriteValueAMD64_OpLeq32F(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedFloorWithPrecisionFloat32x16 [a] x mask) - // result: (VRNDSCALEPSMasked512 [a+1] x (VPMOVVec32x16ToM mask)) + // match: (Leq32F x y) + // result: (SETGEF (UCOMISS y x)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VRNDSCALEPSMasked512) - v.AuxInt = int8ToAuxInt(a + 1) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + v.reset(OpAMD64SETGEF) + v0 := b.NewValue0(v.Pos, OpAMD64UCOMISS, types.TypeFlags) + v0.AddArg2(y, x) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedFloorWithPrecisionFloat32x4(v *Value) bool { +func rewriteValueAMD64_OpLeq32U(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedFloorWithPrecisionFloat32x4 [a] x mask) - // result: (VRNDSCALEPSMasked128 [a+1] x (VPMOVVec32x4ToM mask)) + // match: (Leq32U x y) + // result: (SETBE (CMPL x y)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VRNDSCALEPSMasked128) - v.AuxInt = int8ToAuxInt(a + 1) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + v.reset(OpAMD64SETBE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPL, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedFloorWithPrecisionFloat32x8(v *Value) bool { +func rewriteValueAMD64_OpLeq64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedFloorWithPrecisionFloat32x8 [a] x mask) - // result: (VRNDSCALEPSMasked256 [a+1] x (VPMOVVec32x8ToM mask)) + // match: (Leq64 x y) + // result: (SETLE (CMPQ x y)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VRNDSCALEPSMasked256) - v.AuxInt = int8ToAuxInt(a + 1) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + v.reset(OpAMD64SETLE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedFloorWithPrecisionFloat64x2(v *Value) bool { +func rewriteValueAMD64_OpLeq64F(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedFloorWithPrecisionFloat64x2 [a] x mask) - // result: (VRNDSCALEPDMasked128 [a+1] x (VPMOVVec64x2ToM mask)) + // match: (Leq64F x y) + // result: (SETGEF (UCOMISD y x)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VRNDSCALEPDMasked128) - v.AuxInt = int8ToAuxInt(a + 1) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + v.reset(OpAMD64SETGEF) + v0 := b.NewValue0(v.Pos, OpAMD64UCOMISD, types.TypeFlags) + v0.AddArg2(y, x) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedFloorWithPrecisionFloat64x4(v *Value) bool { +func rewriteValueAMD64_OpLeq64U(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedFloorWithPrecisionFloat64x4 [a] x mask) - // result: (VRNDSCALEPDMasked256 [a+1] x (VPMOVVec64x4ToM mask)) + // match: (Leq64U x y) + // result: (SETBE (CMPQ x y)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VRNDSCALEPDMasked256) - v.AuxInt = int8ToAuxInt(a + 1) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + v.reset(OpAMD64SETBE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedFloorWithPrecisionFloat64x8(v *Value) bool { +func rewriteValueAMD64_OpLeq8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedFloorWithPrecisionFloat64x8 [a] x mask) - // result: (VRNDSCALEPDMasked512 [a+1] x (VPMOVVec64x8ToM mask)) + // match: (Leq8 x y) + // result: (SETLE (CMPB x y)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VRNDSCALEPDMasked512) - v.AuxInt = int8ToAuxInt(a + 1) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + v.reset(OpAMD64SETLE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedFusedMultiplyAddFloat32x16(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpLeq8U(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedFusedMultiplyAddFloat32x16 x y z mask) - // result: (VFMADD213PSMasked512 x y z (VPMOVVec32x16ToM mask)) + // match: (Leq8U x y) + // result: (SETBE (CMPB x y)) for { x := v_0 y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VFMADD213PSMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + v.reset(OpAMD64SETBE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedFusedMultiplyAddFloat32x4(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpLess16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedFusedMultiplyAddFloat32x4 x y z mask) - // result: (VFMADD213PSMasked128 x y z (VPMOVVec32x4ToM mask)) + // match: (Less16 x y) + // result: (SETL (CMPW x y)) for { x := v_0 y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VFMADD213PSMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + v.reset(OpAMD64SETL) + v0 := b.NewValue0(v.Pos, OpAMD64CMPW, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedFusedMultiplyAddFloat32x8(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpLess16U(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedFusedMultiplyAddFloat32x8 x y z mask) - // result: (VFMADD213PSMasked256 x y z (VPMOVVec32x8ToM mask)) + // match: (Less16U x y) + // result: (SETB (CMPW x y)) for { x := v_0 y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VFMADD213PSMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + v.reset(OpAMD64SETB) + v0 := b.NewValue0(v.Pos, OpAMD64CMPW, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedFusedMultiplyAddFloat64x2(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpLess32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedFusedMultiplyAddFloat64x2 x y z mask) - // result: (VFMADD213PDMasked128 x y z (VPMOVVec64x2ToM mask)) + // match: (Less32 x y) + // result: (SETL (CMPL x y)) for { x := v_0 y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VFMADD213PDMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + v.reset(OpAMD64SETL) + v0 := b.NewValue0(v.Pos, OpAMD64CMPL, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedFusedMultiplyAddFloat64x4(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpLess32F(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedFusedMultiplyAddFloat64x4 x y z mask) - // result: (VFMADD213PDMasked256 x y z (VPMOVVec64x4ToM mask)) + // match: (Less32F x y) + // result: (SETGF (UCOMISS y x)) for { x := v_0 y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VFMADD213PDMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + v.reset(OpAMD64SETGF) + v0 := b.NewValue0(v.Pos, OpAMD64UCOMISS, types.TypeFlags) + v0.AddArg2(y, x) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedFusedMultiplyAddFloat64x8(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpLess32U(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedFusedMultiplyAddFloat64x8 x y z mask) - // result: (VFMADD213PDMasked512 x y z (VPMOVVec64x8ToM mask)) + // match: (Less32U x y) + // result: (SETB (CMPL x y)) for { x := v_0 y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VFMADD213PDMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + v.reset(OpAMD64SETB) + v0 := b.NewValue0(v.Pos, OpAMD64CMPL, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedFusedMultiplyAddSubFloat32x16(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpLess64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedFusedMultiplyAddSubFloat32x16 x y z mask) - // result: (VFMADDSUB213PSMasked512 x y z (VPMOVVec32x16ToM mask)) + // match: (Less64 x y) + // result: (SETL (CMPQ x y)) for { x := v_0 y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VFMADDSUB213PSMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + v.reset(OpAMD64SETL) + v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedFusedMultiplyAddSubFloat32x4(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpLess64F(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedFusedMultiplyAddSubFloat32x4 x y z mask) - // result: (VFMADDSUB213PSMasked128 x y z (VPMOVVec32x4ToM mask)) + // match: (Less64F x y) + // result: (SETGF (UCOMISD y x)) for { x := v_0 y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VFMADDSUB213PSMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + v.reset(OpAMD64SETGF) + v0 := b.NewValue0(v.Pos, OpAMD64UCOMISD, types.TypeFlags) + v0.AddArg2(y, x) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedFusedMultiplyAddSubFloat32x8(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpLess64U(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedFusedMultiplyAddSubFloat32x8 x y z mask) - // result: (VFMADDSUB213PSMasked256 x y z (VPMOVVec32x8ToM mask)) + // match: (Less64U x y) + // result: (SETB (CMPQ x y)) for { x := v_0 y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VFMADDSUB213PSMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + v.reset(OpAMD64SETB) + v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedFusedMultiplyAddSubFloat64x2(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpLess8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedFusedMultiplyAddSubFloat64x2 x y z mask) - // result: (VFMADDSUB213PDMasked128 x y z (VPMOVVec64x2ToM mask)) + // match: (Less8 x y) + // result: (SETL (CMPB x y)) for { x := v_0 y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VFMADDSUB213PDMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + v.reset(OpAMD64SETL) + v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedFusedMultiplyAddSubFloat64x4(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpLess8U(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedFusedMultiplyAddSubFloat64x4 x y z mask) - // result: (VFMADDSUB213PDMasked256 x y z (VPMOVVec64x4ToM mask)) + // match: (Less8U x y) + // result: (SETB (CMPB x y)) for { x := v_0 y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VFMADDSUB213PDMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + v.reset(OpAMD64SETB) + v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedFusedMultiplyAddSubFloat64x8(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpLessEqualFloat32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedFusedMultiplyAddSubFloat64x8 x y z mask) - // result: (VFMADDSUB213PDMasked512 x y z (VPMOVVec64x8ToM mask)) + typ := &b.Func.Config.Types + // match: (LessEqualFloat32x16 x y) + // result: (VPMOVMToVec32x16 (VCMPPS512 [2] x y)) for { x := v_0 y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VFMADDSUB213PDMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + v.reset(OpAMD64VPMOVMToVec32x16) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask) + v0.AuxInt = int8ToAuxInt(2) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedFusedMultiplySubAddFloat32x16(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpLessEqualFloat32x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (MaskedFusedMultiplySubAddFloat32x16 x y z mask) - // result: (VFMSUBADD213PSMasked512 x y z (VPMOVVec32x16ToM mask)) + // match: (LessEqualFloat32x4 x y) + // result: (VCMPPS128 [2] x y) for { x := v_0 y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VFMSUBADD213PSMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + v.reset(OpAMD64VCMPPS128) + v.AuxInt = int8ToAuxInt(2) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpMaskedFusedMultiplySubAddFloat32x4(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpLessEqualFloat32x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (MaskedFusedMultiplySubAddFloat32x4 x y z mask) - // result: (VFMSUBADD213PSMasked128 x y z (VPMOVVec32x4ToM mask)) + // match: (LessEqualFloat32x8 x y) + // result: (VCMPPS256 [2] x y) for { x := v_0 y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VFMSUBADD213PSMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + v.reset(OpAMD64VCMPPS256) + v.AuxInt = int8ToAuxInt(2) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpMaskedFusedMultiplySubAddFloat32x8(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpLessEqualFloat64x2(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (LessEqualFloat64x2 x y) + // result: (VCMPPD128 [2] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VCMPPD128) + v.AuxInt = int8ToAuxInt(2) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpLessEqualFloat64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (LessEqualFloat64x4 x y) + // result: (VCMPPD256 [2] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VCMPPD256) + v.AuxInt = int8ToAuxInt(2) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpLessEqualFloat64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedFusedMultiplySubAddFloat32x8 x y z mask) - // result: (VFMSUBADD213PSMasked256 x y z (VPMOVVec32x8ToM mask)) + typ := &b.Func.Config.Types + // match: (LessEqualFloat64x8 x y) + // result: (VPMOVMToVec64x8 (VCMPPD512 [2] x y)) for { x := v_0 y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VFMSUBADD213PSMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + v.reset(OpAMD64VPMOVMToVec64x8) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask) + v0.AuxInt = int8ToAuxInt(2) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedFusedMultiplySubAddFloat64x2(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpLessEqualInt16x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedFusedMultiplySubAddFloat64x2 x y z mask) - // result: (VFMSUBADD213PDMasked128 x y z (VPMOVVec64x2ToM mask)) + typ := &b.Func.Config.Types + // match: (LessEqualInt16x16 x y) + // result: (VPMOVMToVec16x16 (VPCMPW256 [2] x y)) for { x := v_0 y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VFMSUBADD213PDMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + v.reset(OpAMD64VPMOVMToVec16x16) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW256, typ.Mask) + v0.AuxInt = int8ToAuxInt(2) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedFusedMultiplySubAddFloat64x4(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpLessEqualInt16x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedFusedMultiplySubAddFloat64x4 x y z mask) - // result: (VFMSUBADD213PDMasked256 x y z (VPMOVVec64x4ToM mask)) + typ := &b.Func.Config.Types + // match: (LessEqualInt16x32 x y) + // result: (VPMOVMToVec16x32 (VPCMPW512 [2] x y)) for { x := v_0 y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VFMSUBADD213PDMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + v.reset(OpAMD64VPMOVMToVec16x32) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW512, typ.Mask) + v0.AuxInt = int8ToAuxInt(2) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedFusedMultiplySubAddFloat64x8(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpLessEqualInt16x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedFusedMultiplySubAddFloat64x8 x y z mask) - // result: (VFMSUBADD213PDMasked512 x y z (VPMOVVec64x8ToM mask)) + typ := &b.Func.Config.Types + // match: (LessEqualInt16x8 x y) + // result: (VPMOVMToVec16x8 (VPCMPW128 [2] x y)) for { x := v_0 y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VFMSUBADD213PDMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + v.reset(OpAMD64VPMOVMToVec16x8) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW128, typ.Mask) + v0.AuxInt = int8ToAuxInt(2) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedGaloisFieldAffineTransformInversedUint8x16(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLessEqualInt32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedGaloisFieldAffineTransformInversedUint8x16 [a] x y mask) - // result: (VGF2P8AFFINEINVQBMasked128 [a] x y (VPMOVVec8x16ToM mask)) + typ := &b.Func.Config.Types + // match: (LessEqualInt32x16 x y) + // result: (VPMOVMToVec32x16 (VPCMPD512 [2] x y)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VGF2P8AFFINEINVQBMasked128) - v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec32x16) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD512, typ.Mask) + v0.AuxInt = int8ToAuxInt(2) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedGaloisFieldAffineTransformInversedUint8x32(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLessEqualInt32x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedGaloisFieldAffineTransformInversedUint8x32 [a] x y mask) - // result: (VGF2P8AFFINEINVQBMasked256 [a] x y (VPMOVVec8x32ToM mask)) + typ := &b.Func.Config.Types + // match: (LessEqualInt32x4 x y) + // result: (VPMOVMToVec32x4 (VPCMPD128 [2] x y)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VGF2P8AFFINEINVQBMasked256) - v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec32x4) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD128, typ.Mask) + v0.AuxInt = int8ToAuxInt(2) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedGaloisFieldAffineTransformInversedUint8x64(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLessEqualInt32x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedGaloisFieldAffineTransformInversedUint8x64 [a] x y mask) - // result: (VGF2P8AFFINEINVQBMasked512 [a] x y (VPMOVVec8x64ToM mask)) + typ := &b.Func.Config.Types + // match: (LessEqualInt32x8 x y) + // result: (VPMOVMToVec32x8 (VPCMPD256 [2] x y)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VGF2P8AFFINEINVQBMasked512) - v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec32x8) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD256, typ.Mask) + v0.AuxInt = int8ToAuxInt(2) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedGaloisFieldAffineTransformUint8x16(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLessEqualInt64x2(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedGaloisFieldAffineTransformUint8x16 [a] x y mask) - // result: (VGF2P8AFFINEQBMasked128 [a] x y (VPMOVVec8x16ToM mask)) + typ := &b.Func.Config.Types + // match: (LessEqualInt64x2 x y) + // result: (VPMOVMToVec64x2 (VPCMPQ128 [2] x y)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VGF2P8AFFINEQBMasked128) - v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec64x2) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ128, typ.Mask) + v0.AuxInt = int8ToAuxInt(2) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedGaloisFieldAffineTransformUint8x32(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLessEqualInt64x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedGaloisFieldAffineTransformUint8x32 [a] x y mask) - // result: (VGF2P8AFFINEQBMasked256 [a] x y (VPMOVVec8x32ToM mask)) + typ := &b.Func.Config.Types + // match: (LessEqualInt64x4 x y) + // result: (VPMOVMToVec64x4 (VPCMPQ256 [2] x y)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VGF2P8AFFINEQBMasked256) - v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec64x4) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ256, typ.Mask) + v0.AuxInt = int8ToAuxInt(2) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedGaloisFieldAffineTransformUint8x64(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLessEqualInt64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedGaloisFieldAffineTransformUint8x64 [a] x y mask) - // result: (VGF2P8AFFINEQBMasked512 [a] x y (VPMOVVec8x64ToM mask)) + typ := &b.Func.Config.Types + // match: (LessEqualInt64x8 x y) + // result: (VPMOVMToVec64x8 (VPCMPQ512 [2] x y)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VGF2P8AFFINEQBMasked512) - v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec64x8) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ512, typ.Mask) + v0.AuxInt = int8ToAuxInt(2) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedGaloisFieldMulUint8x16(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLessEqualInt8x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedGaloisFieldMulUint8x16 x y mask) - // result: (VGF2P8MULBMasked128 x y (VPMOVVec8x16ToM mask)) + typ := &b.Func.Config.Types + // match: (LessEqualInt8x16 x y) + // result: (VPMOVMToVec8x16 (VPCMPB128 [2] x y)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VGF2P8MULBMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec8x16) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB128, typ.Mask) + v0.AuxInt = int8ToAuxInt(2) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedGaloisFieldMulUint8x32(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLessEqualInt8x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedGaloisFieldMulUint8x32 x y mask) - // result: (VGF2P8MULBMasked256 x y (VPMOVVec8x32ToM mask)) + typ := &b.Func.Config.Types + // match: (LessEqualInt8x32 x y) + // result: (VPMOVMToVec8x32 (VPCMPB256 [2] x y)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VGF2P8MULBMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec8x32) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB256, typ.Mask) + v0.AuxInt = int8ToAuxInt(2) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedGaloisFieldMulUint8x64(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLessEqualInt8x64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedGaloisFieldMulUint8x64 x y mask) - // result: (VGF2P8MULBMasked512 x y (VPMOVVec8x64ToM mask)) + typ := &b.Func.Config.Types + // match: (LessEqualInt8x64 x y) + // result: (VPMOVMToVec8x64 (VPCMPB512 [2] x y)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VGF2P8MULBMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec8x64) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB512, typ.Mask) + v0.AuxInt = int8ToAuxInt(2) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedGreaterEqualFloat32x16(v *Value) bool { +func rewriteValueAMD64_OpLessEqualMaskedFloat32x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedGreaterEqualFloat32x16 x y mask) - // result: (VPMOVMToVec32x16 (VCMPPSMasked512 [13] x y (VPMOVVec32x16ToM mask))) + // match: (LessEqualMaskedFloat32x16 x y mask) + // result: (VPMOVMToVec32x16 (VCMPPSMasked512 [2] x y (VPMOVVec32x16ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec32x16) v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(13) + v0.AuxInt = int8ToAuxInt(2) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -37841,21 +37742,21 @@ func rewriteValueAMD64_OpMaskedGreaterEqualFloat32x16(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedGreaterEqualFloat32x4(v *Value) bool { +func rewriteValueAMD64_OpLessEqualMaskedFloat32x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedGreaterEqualFloat32x4 x y mask) - // result: (VPMOVMToVec32x4 (VCMPPSMasked128 [13] x y (VPMOVVec32x4ToM mask))) + // match: (LessEqualMaskedFloat32x4 x y mask) + // result: (VPMOVMToVec32x4 (VCMPPSMasked128 [2] x y (VPMOVVec32x4ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec32x4) v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(13) + v0.AuxInt = int8ToAuxInt(2) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -37863,21 +37764,21 @@ func rewriteValueAMD64_OpMaskedGreaterEqualFloat32x4(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedGreaterEqualFloat32x8(v *Value) bool { +func rewriteValueAMD64_OpLessEqualMaskedFloat32x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedGreaterEqualFloat32x8 x y mask) - // result: (VPMOVMToVec32x8 (VCMPPSMasked256 [13] x y (VPMOVVec32x8ToM mask))) + // match: (LessEqualMaskedFloat32x8 x y mask) + // result: (VPMOVMToVec32x8 (VCMPPSMasked256 [2] x y (VPMOVVec32x8ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec32x8) v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(13) + v0.AuxInt = int8ToAuxInt(2) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -37885,21 +37786,21 @@ func rewriteValueAMD64_OpMaskedGreaterEqualFloat32x8(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedGreaterEqualFloat64x2(v *Value) bool { +func rewriteValueAMD64_OpLessEqualMaskedFloat64x2(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedGreaterEqualFloat64x2 x y mask) - // result: (VPMOVMToVec64x2 (VCMPPDMasked128 [13] x y (VPMOVVec64x2ToM mask))) + // match: (LessEqualMaskedFloat64x2 x y mask) + // result: (VPMOVMToVec64x2 (VCMPPDMasked128 [2] x y (VPMOVVec64x2ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec64x2) v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(13) + v0.AuxInt = int8ToAuxInt(2) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -37907,21 +37808,21 @@ func rewriteValueAMD64_OpMaskedGreaterEqualFloat64x2(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedGreaterEqualFloat64x4(v *Value) bool { +func rewriteValueAMD64_OpLessEqualMaskedFloat64x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedGreaterEqualFloat64x4 x y mask) - // result: (VPMOVMToVec64x4 (VCMPPDMasked256 [13] x y (VPMOVVec64x4ToM mask))) + // match: (LessEqualMaskedFloat64x4 x y mask) + // result: (VPMOVMToVec64x4 (VCMPPDMasked256 [2] x y (VPMOVVec64x4ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec64x4) v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(13) + v0.AuxInt = int8ToAuxInt(2) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -37929,21 +37830,21 @@ func rewriteValueAMD64_OpMaskedGreaterEqualFloat64x4(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedGreaterEqualFloat64x8(v *Value) bool { +func rewriteValueAMD64_OpLessEqualMaskedFloat64x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedGreaterEqualFloat64x8 x y mask) - // result: (VPMOVMToVec64x8 (VCMPPDMasked512 [13] x y (VPMOVVec64x8ToM mask))) + // match: (LessEqualMaskedFloat64x8 x y mask) + // result: (VPMOVMToVec64x8 (VCMPPDMasked512 [2] x y (VPMOVVec64x8ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec64x8) v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(13) + v0.AuxInt = int8ToAuxInt(2) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -37951,21 +37852,21 @@ func rewriteValueAMD64_OpMaskedGreaterEqualFloat64x8(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedGreaterEqualInt16x16(v *Value) bool { +func rewriteValueAMD64_OpLessEqualMaskedInt16x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedGreaterEqualInt16x16 x y mask) - // result: (VPMOVMToVec16x16 (VPCMPWMasked256 [13] x y (VPMOVVec16x16ToM mask))) + // match: (LessEqualMaskedInt16x16 x y mask) + // result: (VPMOVMToVec16x16 (VPCMPWMasked256 [2] x y (VPMOVVec16x16ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec16x16) v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(13) + v0.AuxInt = int8ToAuxInt(2) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -37973,21 +37874,21 @@ func rewriteValueAMD64_OpMaskedGreaterEqualInt16x16(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedGreaterEqualInt16x32(v *Value) bool { +func rewriteValueAMD64_OpLessEqualMaskedInt16x32(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedGreaterEqualInt16x32 x y mask) - // result: (VPMOVMToVec16x32 (VPCMPWMasked512 [13] x y (VPMOVVec16x32ToM mask))) + // match: (LessEqualMaskedInt16x32 x y mask) + // result: (VPMOVMToVec16x32 (VPCMPWMasked512 [2] x y (VPMOVVec16x32ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec16x32) v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(13) + v0.AuxInt = int8ToAuxInt(2) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -37995,21 +37896,21 @@ func rewriteValueAMD64_OpMaskedGreaterEqualInt16x32(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedGreaterEqualInt16x8(v *Value) bool { +func rewriteValueAMD64_OpLessEqualMaskedInt16x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedGreaterEqualInt16x8 x y mask) - // result: (VPMOVMToVec16x8 (VPCMPWMasked128 [13] x y (VPMOVVec16x8ToM mask))) + // match: (LessEqualMaskedInt16x8 x y mask) + // result: (VPMOVMToVec16x8 (VPCMPWMasked128 [2] x y (VPMOVVec16x8ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec16x8) v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(13) + v0.AuxInt = int8ToAuxInt(2) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -38017,21 +37918,21 @@ func rewriteValueAMD64_OpMaskedGreaterEqualInt16x8(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedGreaterEqualInt32x16(v *Value) bool { +func rewriteValueAMD64_OpLessEqualMaskedInt32x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedGreaterEqualInt32x16 x y mask) - // result: (VPMOVMToVec32x16 (VPCMPDMasked512 [13] x y (VPMOVVec32x16ToM mask))) + // match: (LessEqualMaskedInt32x16 x y mask) + // result: (VPMOVMToVec32x16 (VPCMPDMasked512 [2] x y (VPMOVVec32x16ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec32x16) v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(13) + v0.AuxInt = int8ToAuxInt(2) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -38039,21 +37940,21 @@ func rewriteValueAMD64_OpMaskedGreaterEqualInt32x16(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedGreaterEqualInt32x4(v *Value) bool { +func rewriteValueAMD64_OpLessEqualMaskedInt32x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedGreaterEqualInt32x4 x y mask) - // result: (VPMOVMToVec32x4 (VPCMPDMasked128 [13] x y (VPMOVVec32x4ToM mask))) + // match: (LessEqualMaskedInt32x4 x y mask) + // result: (VPMOVMToVec32x4 (VPCMPDMasked128 [2] x y (VPMOVVec32x4ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec32x4) v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(13) + v0.AuxInt = int8ToAuxInt(2) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -38061,21 +37962,21 @@ func rewriteValueAMD64_OpMaskedGreaterEqualInt32x4(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedGreaterEqualInt32x8(v *Value) bool { +func rewriteValueAMD64_OpLessEqualMaskedInt32x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedGreaterEqualInt32x8 x y mask) - // result: (VPMOVMToVec32x8 (VPCMPDMasked256 [13] x y (VPMOVVec32x8ToM mask))) + // match: (LessEqualMaskedInt32x8 x y mask) + // result: (VPMOVMToVec32x8 (VPCMPDMasked256 [2] x y (VPMOVVec32x8ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec32x8) v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(13) + v0.AuxInt = int8ToAuxInt(2) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -38083,21 +37984,21 @@ func rewriteValueAMD64_OpMaskedGreaterEqualInt32x8(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedGreaterEqualInt64x2(v *Value) bool { +func rewriteValueAMD64_OpLessEqualMaskedInt64x2(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedGreaterEqualInt64x2 x y mask) - // result: (VPMOVMToVec64x2 (VPCMPQMasked128 [13] x y (VPMOVVec64x2ToM mask))) + // match: (LessEqualMaskedInt64x2 x y mask) + // result: (VPMOVMToVec64x2 (VPCMPQMasked128 [2] x y (VPMOVVec64x2ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec64x2) v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(13) + v0.AuxInt = int8ToAuxInt(2) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -38105,21 +38006,21 @@ func rewriteValueAMD64_OpMaskedGreaterEqualInt64x2(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedGreaterEqualInt64x4(v *Value) bool { +func rewriteValueAMD64_OpLessEqualMaskedInt64x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedGreaterEqualInt64x4 x y mask) - // result: (VPMOVMToVec64x4 (VPCMPQMasked256 [13] x y (VPMOVVec64x4ToM mask))) + // match: (LessEqualMaskedInt64x4 x y mask) + // result: (VPMOVMToVec64x4 (VPCMPQMasked256 [2] x y (VPMOVVec64x4ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec64x4) v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(13) + v0.AuxInt = int8ToAuxInt(2) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -38127,21 +38028,21 @@ func rewriteValueAMD64_OpMaskedGreaterEqualInt64x4(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedGreaterEqualInt64x8(v *Value) bool { +func rewriteValueAMD64_OpLessEqualMaskedInt64x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedGreaterEqualInt64x8 x y mask) - // result: (VPMOVMToVec64x8 (VPCMPQMasked512 [13] x y (VPMOVVec64x8ToM mask))) + // match: (LessEqualMaskedInt64x8 x y mask) + // result: (VPMOVMToVec64x8 (VPCMPQMasked512 [2] x y (VPMOVVec64x8ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec64x8) v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(13) + v0.AuxInt = int8ToAuxInt(2) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -38149,21 +38050,21 @@ func rewriteValueAMD64_OpMaskedGreaterEqualInt64x8(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedGreaterEqualInt8x16(v *Value) bool { +func rewriteValueAMD64_OpLessEqualMaskedInt8x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedGreaterEqualInt8x16 x y mask) - // result: (VPMOVMToVec8x16 (VPCMPBMasked128 [13] x y (VPMOVVec8x16ToM mask))) + // match: (LessEqualMaskedInt8x16 x y mask) + // result: (VPMOVMToVec8x16 (VPCMPBMasked128 [2] x y (VPMOVVec8x16ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec8x16) v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(13) + v0.AuxInt = int8ToAuxInt(2) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -38171,21 +38072,21 @@ func rewriteValueAMD64_OpMaskedGreaterEqualInt8x16(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedGreaterEqualInt8x32(v *Value) bool { +func rewriteValueAMD64_OpLessEqualMaskedInt8x32(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedGreaterEqualInt8x32 x y mask) - // result: (VPMOVMToVec8x32 (VPCMPBMasked256 [13] x y (VPMOVVec8x32ToM mask))) + // match: (LessEqualMaskedInt8x32 x y mask) + // result: (VPMOVMToVec8x32 (VPCMPBMasked256 [2] x y (VPMOVVec8x32ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec8x32) v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(13) + v0.AuxInt = int8ToAuxInt(2) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -38193,21 +38094,21 @@ func rewriteValueAMD64_OpMaskedGreaterEqualInt8x32(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedGreaterEqualInt8x64(v *Value) bool { +func rewriteValueAMD64_OpLessEqualMaskedInt8x64(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedGreaterEqualInt8x64 x y mask) - // result: (VPMOVMToVec8x64 (VPCMPBMasked512 [13] x y (VPMOVVec8x64ToM mask))) + // match: (LessEqualMaskedInt8x64 x y mask) + // result: (VPMOVMToVec8x64 (VPCMPBMasked512 [2] x y (VPMOVVec8x64ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec8x64) v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(13) + v0.AuxInt = int8ToAuxInt(2) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -38215,21 +38116,21 @@ func rewriteValueAMD64_OpMaskedGreaterEqualInt8x64(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedGreaterEqualUint16x16(v *Value) bool { +func rewriteValueAMD64_OpLessEqualMaskedUint16x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedGreaterEqualUint16x16 x y mask) - // result: (VPMOVMToVec16x16 (VPCMPUWMasked256 [13] x y (VPMOVVec16x16ToM mask))) + // match: (LessEqualMaskedUint16x16 x y mask) + // result: (VPMOVMToVec16x16 (VPCMPUWMasked256 [2] x y (VPMOVVec16x16ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec16x16) v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(13) + v0.AuxInt = int8ToAuxInt(2) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -38237,21 +38138,21 @@ func rewriteValueAMD64_OpMaskedGreaterEqualUint16x16(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedGreaterEqualUint16x32(v *Value) bool { +func rewriteValueAMD64_OpLessEqualMaskedUint16x32(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedGreaterEqualUint16x32 x y mask) - // result: (VPMOVMToVec16x32 (VPCMPUWMasked512 [13] x y (VPMOVVec16x32ToM mask))) + // match: (LessEqualMaskedUint16x32 x y mask) + // result: (VPMOVMToVec16x32 (VPCMPUWMasked512 [2] x y (VPMOVVec16x32ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec16x32) v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(13) + v0.AuxInt = int8ToAuxInt(2) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -38259,21 +38160,21 @@ func rewriteValueAMD64_OpMaskedGreaterEqualUint16x32(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedGreaterEqualUint16x8(v *Value) bool { +func rewriteValueAMD64_OpLessEqualMaskedUint16x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedGreaterEqualUint16x8 x y mask) - // result: (VPMOVMToVec16x8 (VPCMPUWMasked128 [13] x y (VPMOVVec16x8ToM mask))) + // match: (LessEqualMaskedUint16x8 x y mask) + // result: (VPMOVMToVec16x8 (VPCMPUWMasked128 [2] x y (VPMOVVec16x8ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec16x8) v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(13) + v0.AuxInt = int8ToAuxInt(2) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -38281,21 +38182,21 @@ func rewriteValueAMD64_OpMaskedGreaterEqualUint16x8(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedGreaterEqualUint32x16(v *Value) bool { +func rewriteValueAMD64_OpLessEqualMaskedUint32x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedGreaterEqualUint32x16 x y mask) - // result: (VPMOVMToVec32x16 (VPCMPUDMasked512 [13] x y (VPMOVVec32x16ToM mask))) + // match: (LessEqualMaskedUint32x16 x y mask) + // result: (VPMOVMToVec32x16 (VPCMPUDMasked512 [2] x y (VPMOVVec32x16ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec32x16) v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(13) + v0.AuxInt = int8ToAuxInt(2) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -38303,21 +38204,21 @@ func rewriteValueAMD64_OpMaskedGreaterEqualUint32x16(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedGreaterEqualUint32x4(v *Value) bool { +func rewriteValueAMD64_OpLessEqualMaskedUint32x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedGreaterEqualUint32x4 x y mask) - // result: (VPMOVMToVec32x4 (VPCMPUDMasked128 [13] x y (VPMOVVec32x4ToM mask))) + // match: (LessEqualMaskedUint32x4 x y mask) + // result: (VPMOVMToVec32x4 (VPCMPUDMasked128 [2] x y (VPMOVVec32x4ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec32x4) v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(13) + v0.AuxInt = int8ToAuxInt(2) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -38325,21 +38226,21 @@ func rewriteValueAMD64_OpMaskedGreaterEqualUint32x4(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedGreaterEqualUint32x8(v *Value) bool { +func rewriteValueAMD64_OpLessEqualMaskedUint32x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedGreaterEqualUint32x8 x y mask) - // result: (VPMOVMToVec32x8 (VPCMPUDMasked256 [13] x y (VPMOVVec32x8ToM mask))) + // match: (LessEqualMaskedUint32x8 x y mask) + // result: (VPMOVMToVec32x8 (VPCMPUDMasked256 [2] x y (VPMOVVec32x8ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec32x8) v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(13) + v0.AuxInt = int8ToAuxInt(2) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -38347,21 +38248,21 @@ func rewriteValueAMD64_OpMaskedGreaterEqualUint32x8(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedGreaterEqualUint64x2(v *Value) bool { +func rewriteValueAMD64_OpLessEqualMaskedUint64x2(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedGreaterEqualUint64x2 x y mask) - // result: (VPMOVMToVec64x2 (VPCMPUQMasked128 [13] x y (VPMOVVec64x2ToM mask))) + // match: (LessEqualMaskedUint64x2 x y mask) + // result: (VPMOVMToVec64x2 (VPCMPUQMasked128 [2] x y (VPMOVVec64x2ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec64x2) v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(13) + v0.AuxInt = int8ToAuxInt(2) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -38369,21 +38270,21 @@ func rewriteValueAMD64_OpMaskedGreaterEqualUint64x2(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedGreaterEqualUint64x4(v *Value) bool { +func rewriteValueAMD64_OpLessEqualMaskedUint64x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedGreaterEqualUint64x4 x y mask) - // result: (VPMOVMToVec64x4 (VPCMPUQMasked256 [13] x y (VPMOVVec64x4ToM mask))) + // match: (LessEqualMaskedUint64x4 x y mask) + // result: (VPMOVMToVec64x4 (VPCMPUQMasked256 [2] x y (VPMOVVec64x4ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec64x4) v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(13) + v0.AuxInt = int8ToAuxInt(2) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -38391,21 +38292,21 @@ func rewriteValueAMD64_OpMaskedGreaterEqualUint64x4(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedGreaterEqualUint64x8(v *Value) bool { +func rewriteValueAMD64_OpLessEqualMaskedUint64x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedGreaterEqualUint64x8 x y mask) - // result: (VPMOVMToVec64x8 (VPCMPUQMasked512 [13] x y (VPMOVVec64x8ToM mask))) + // match: (LessEqualMaskedUint64x8 x y mask) + // result: (VPMOVMToVec64x8 (VPCMPUQMasked512 [2] x y (VPMOVVec64x8ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec64x8) v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(13) + v0.AuxInt = int8ToAuxInt(2) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -38413,21 +38314,21 @@ func rewriteValueAMD64_OpMaskedGreaterEqualUint64x8(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedGreaterEqualUint8x16(v *Value) bool { +func rewriteValueAMD64_OpLessEqualMaskedUint8x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedGreaterEqualUint8x16 x y mask) - // result: (VPMOVMToVec8x16 (VPCMPUBMasked128 [13] x y (VPMOVVec8x16ToM mask))) + // match: (LessEqualMaskedUint8x16 x y mask) + // result: (VPMOVMToVec8x16 (VPCMPUBMasked128 [2] x y (VPMOVVec8x16ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec8x16) v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(13) + v0.AuxInt = int8ToAuxInt(2) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -38435,21 +38336,21 @@ func rewriteValueAMD64_OpMaskedGreaterEqualUint8x16(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedGreaterEqualUint8x32(v *Value) bool { +func rewriteValueAMD64_OpLessEqualMaskedUint8x32(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedGreaterEqualUint8x32 x y mask) - // result: (VPMOVMToVec8x32 (VPCMPUBMasked256 [13] x y (VPMOVVec8x32ToM mask))) + // match: (LessEqualMaskedUint8x32 x y mask) + // result: (VPMOVMToVec8x32 (VPCMPUBMasked256 [2] x y (VPMOVVec8x32ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec8x32) v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(13) + v0.AuxInt = int8ToAuxInt(2) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -38457,21 +38358,21 @@ func rewriteValueAMD64_OpMaskedGreaterEqualUint8x32(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedGreaterEqualUint8x64(v *Value) bool { +func rewriteValueAMD64_OpLessEqualMaskedUint8x64(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedGreaterEqualUint8x64 x y mask) - // result: (VPMOVMToVec8x64 (VPCMPUBMasked512 [13] x y (VPMOVVec8x64ToM mask))) + // match: (LessEqualMaskedUint8x64 x y mask) + // result: (VPMOVMToVec8x64 (VPCMPUBMasked512 [2] x y (VPMOVVec8x64ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec8x64) v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(13) + v0.AuxInt = int8ToAuxInt(2) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -38479,483 +38380,545 @@ func rewriteValueAMD64_OpMaskedGreaterEqualUint8x64(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedGreaterFloat32x16(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLessEqualUint16x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedGreaterFloat32x16 x y mask) - // result: (VPMOVMToVec32x16 (VCMPPSMasked512 [14] x y (VPMOVVec32x16ToM mask))) + // match: (LessEqualUint16x16 x y) + // result: (VPMOVMToVec16x16 (VPCMPUW256 [2] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec16x16) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW256, typ.Mask) + v0.AuxInt = int8ToAuxInt(2) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpLessEqualUint16x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (LessEqualUint16x32 x y) + // result: (VPMOVMToVec16x32 (VPCMPUW512 [2] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec16x32) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW512, typ.Mask) + v0.AuxInt = int8ToAuxInt(2) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpLessEqualUint16x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (LessEqualUint16x8 x y) + // result: (VPMOVMToVec16x8 (VPCMPUW128 [2] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec16x8) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW128, typ.Mask) + v0.AuxInt = int8ToAuxInt(2) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpLessEqualUint32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (LessEqualUint32x16 x y) + // result: (VPMOVMToVec32x16 (VPCMPUD512 [2] x y)) for { x := v_0 y := v_1 - mask := v_2 v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(14) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD512, typ.Mask) + v0.AuxInt = int8ToAuxInt(2) + v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedGreaterFloat32x4(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLessEqualUint32x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedGreaterFloat32x4 x y mask) - // result: (VPMOVMToVec32x4 (VCMPPSMasked128 [14] x y (VPMOVVec32x4ToM mask))) + // match: (LessEqualUint32x4 x y) + // result: (VPMOVMToVec32x4 (VPCMPUD128 [2] x y)) for { x := v_0 y := v_1 - mask := v_2 v.reset(OpAMD64VPMOVMToVec32x4) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(14) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD128, typ.Mask) + v0.AuxInt = int8ToAuxInt(2) + v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedGreaterFloat32x8(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLessEqualUint32x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedGreaterFloat32x8 x y mask) - // result: (VPMOVMToVec32x8 (VCMPPSMasked256 [14] x y (VPMOVVec32x8ToM mask))) + // match: (LessEqualUint32x8 x y) + // result: (VPMOVMToVec32x8 (VPCMPUD256 [2] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec32x8) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD256, typ.Mask) + v0.AuxInt = int8ToAuxInt(2) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpLessEqualUint64x2(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (LessEqualUint64x2 x y) + // result: (VPMOVMToVec64x2 (VPCMPUQ128 [2] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec64x2) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ128, typ.Mask) + v0.AuxInt = int8ToAuxInt(2) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpLessEqualUint64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (LessEqualUint64x4 x y) + // result: (VPMOVMToVec64x4 (VPCMPUQ256 [2] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec64x4) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ256, typ.Mask) + v0.AuxInt = int8ToAuxInt(2) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpLessEqualUint64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (LessEqualUint64x8 x y) + // result: (VPMOVMToVec64x8 (VPCMPUQ512 [2] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec64x8) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ512, typ.Mask) + v0.AuxInt = int8ToAuxInt(2) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpLessEqualUint8x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (LessEqualUint8x16 x y) + // result: (VPMOVMToVec8x16 (VPCMPUB128 [2] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec8x16) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB128, typ.Mask) + v0.AuxInt = int8ToAuxInt(2) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpLessEqualUint8x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (LessEqualUint8x32 x y) + // result: (VPMOVMToVec8x32 (VPCMPUB256 [2] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec8x32) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB256, typ.Mask) + v0.AuxInt = int8ToAuxInt(2) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpLessEqualUint8x64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (LessEqualUint8x64 x y) + // result: (VPMOVMToVec8x64 (VPCMPUB512 [2] x y)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec32x8) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(14) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) + v.reset(OpAMD64VPMOVMToVec8x64) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB512, typ.Mask) + v0.AuxInt = int8ToAuxInt(2) + v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedGreaterFloat64x2(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLessFloat32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedGreaterFloat64x2 x y mask) - // result: (VPMOVMToVec64x2 (VCMPPDMasked128 [14] x y (VPMOVVec64x2ToM mask))) + // match: (LessFloat32x16 x y) + // result: (VPMOVMToVec32x16 (VCMPPS512 [1] x y)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec64x2) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(14) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) + v.reset(OpAMD64VPMOVMToVec32x16) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask) + v0.AuxInt = int8ToAuxInt(1) + v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedGreaterFloat64x4(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLessFloat32x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedGreaterFloat64x4 x y mask) - // result: (VPMOVMToVec64x4 (VCMPPDMasked256 [14] x y (VPMOVVec64x4ToM mask))) + // match: (LessFloat32x4 x y) + // result: (VCMPPS128 [1] x y) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec64x4) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(14) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + v.reset(OpAMD64VCMPPS128) + v.AuxInt = int8ToAuxInt(1) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpMaskedGreaterFloat64x8(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLessFloat32x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedGreaterFloat64x8 x y mask) - // result: (VPMOVMToVec64x8 (VCMPPDMasked512 [14] x y (VPMOVVec64x8ToM mask))) + // match: (LessFloat32x8 x y) + // result: (VCMPPS256 [1] x y) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(14) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + v.reset(OpAMD64VCMPPS256) + v.AuxInt = int8ToAuxInt(1) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpMaskedGreaterInt16x16(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLessFloat64x2(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedGreaterInt16x16 x y mask) - // result: (VPMOVMToVec16x16 (VPCMPWMasked256 [14] x y (VPMOVVec16x16ToM mask))) + // match: (LessFloat64x2 x y) + // result: (VCMPPD128 [1] x y) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec16x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(14) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + v.reset(OpAMD64VCMPPD128) + v.AuxInt = int8ToAuxInt(1) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpMaskedGreaterInt16x32(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLessFloat64x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedGreaterInt16x32 x y mask) - // result: (VPMOVMToVec16x32 (VPCMPWMasked512 [14] x y (VPMOVVec16x32ToM mask))) + // match: (LessFloat64x4 x y) + // result: (VCMPPD256 [1] x y) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec16x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(14) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + v.reset(OpAMD64VCMPPD256) + v.AuxInt = int8ToAuxInt(1) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpMaskedGreaterInt16x8(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLessFloat64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedGreaterInt16x8 x y mask) - // result: (VPMOVMToVec16x8 (VPCMPWMasked128 [14] x y (VPMOVVec16x8ToM mask))) + // match: (LessFloat64x8 x y) + // result: (VPMOVMToVec64x8 (VCMPPD512 [1] x y)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec16x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(14) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) + v.reset(OpAMD64VPMOVMToVec64x8) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask) + v0.AuxInt = int8ToAuxInt(1) + v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedGreaterInt32x16(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLessInt16x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedGreaterInt32x16 x y mask) - // result: (VPMOVMToVec32x16 (VPCMPDMasked512 [14] x y (VPMOVVec32x16ToM mask))) + // match: (LessInt16x16 x y) + // result: (VPMOVMToVec16x16 (VPCMPW256 [1] x y)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(14) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) + v.reset(OpAMD64VPMOVMToVec16x16) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW256, typ.Mask) + v0.AuxInt = int8ToAuxInt(1) + v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedGreaterInt32x4(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLessInt16x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedGreaterInt32x4 x y mask) - // result: (VPMOVMToVec32x4 (VPCMPDMasked128 [14] x y (VPMOVVec32x4ToM mask))) + // match: (LessInt16x32 x y) + // result: (VPMOVMToVec16x32 (VPCMPW512 [1] x y)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec32x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(14) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) + v.reset(OpAMD64VPMOVMToVec16x32) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW512, typ.Mask) + v0.AuxInt = int8ToAuxInt(1) + v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedGreaterInt32x8(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLessInt16x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedGreaterInt32x8 x y mask) - // result: (VPMOVMToVec32x8 (VPCMPDMasked256 [14] x y (VPMOVVec32x8ToM mask))) + // match: (LessInt16x8 x y) + // result: (VPMOVMToVec16x8 (VPCMPW128 [1] x y)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec32x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(14) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) + v.reset(OpAMD64VPMOVMToVec16x8) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW128, typ.Mask) + v0.AuxInt = int8ToAuxInt(1) + v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedGreaterInt64x2(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLessInt32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedGreaterInt64x2 x y mask) - // result: (VPMOVMToVec64x2 (VPCMPQMasked128 [14] x y (VPMOVVec64x2ToM mask))) + // match: (LessInt32x16 x y) + // result: (VPMOVMToVec32x16 (VPCMPD512 [1] x y)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec64x2) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(14) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) + v.reset(OpAMD64VPMOVMToVec32x16) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD512, typ.Mask) + v0.AuxInt = int8ToAuxInt(1) + v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedGreaterInt64x4(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLessInt32x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedGreaterInt64x4 x y mask) - // result: (VPMOVMToVec64x4 (VPCMPQMasked256 [14] x y (VPMOVVec64x4ToM mask))) + // match: (LessInt32x4 x y) + // result: (VPMOVMToVec32x4 (VPCMPD128 [1] x y)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec64x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(14) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) + v.reset(OpAMD64VPMOVMToVec32x4) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD128, typ.Mask) + v0.AuxInt = int8ToAuxInt(1) + v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedGreaterInt64x8(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLessInt32x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedGreaterInt64x8 x y mask) - // result: (VPMOVMToVec64x8 (VPCMPQMasked512 [14] x y (VPMOVVec64x8ToM mask))) + // match: (LessInt32x8 x y) + // result: (VPMOVMToVec32x8 (VPCMPD256 [1] x y)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(14) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) + v.reset(OpAMD64VPMOVMToVec32x8) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD256, typ.Mask) + v0.AuxInt = int8ToAuxInt(1) + v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedGreaterInt8x16(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLessInt64x2(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedGreaterInt8x16 x y mask) - // result: (VPMOVMToVec8x16 (VPCMPBMasked128 [14] x y (VPMOVVec8x16ToM mask))) + // match: (LessInt64x2 x y) + // result: (VPMOVMToVec64x2 (VPCMPQ128 [1] x y)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec8x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(14) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) + v.reset(OpAMD64VPMOVMToVec64x2) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ128, typ.Mask) + v0.AuxInt = int8ToAuxInt(1) + v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedGreaterInt8x32(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLessInt64x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedGreaterInt8x32 x y mask) - // result: (VPMOVMToVec8x32 (VPCMPBMasked256 [14] x y (VPMOVVec8x32ToM mask))) + // match: (LessInt64x4 x y) + // result: (VPMOVMToVec64x4 (VPCMPQ256 [1] x y)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec8x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(14) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) + v.reset(OpAMD64VPMOVMToVec64x4) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ256, typ.Mask) + v0.AuxInt = int8ToAuxInt(1) + v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedGreaterInt8x64(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLessInt64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedGreaterInt8x64 x y mask) - // result: (VPMOVMToVec8x64 (VPCMPBMasked512 [14] x y (VPMOVVec8x64ToM mask))) + // match: (LessInt64x8 x y) + // result: (VPMOVMToVec64x8 (VPCMPQ512 [1] x y)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec8x64) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(14) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) + v.reset(OpAMD64VPMOVMToVec64x8) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ512, typ.Mask) + v0.AuxInt = int8ToAuxInt(1) + v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedGreaterUint16x16(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLessInt8x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedGreaterUint16x16 x y mask) - // result: (VPMOVMToVec16x16 (VPCMPUWMasked256 [14] x y (VPMOVVec16x16ToM mask))) + // match: (LessInt8x16 x y) + // result: (VPMOVMToVec8x16 (VPCMPB128 [1] x y)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec16x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(14) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) + v.reset(OpAMD64VPMOVMToVec8x16) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB128, typ.Mask) + v0.AuxInt = int8ToAuxInt(1) + v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedGreaterUint16x32(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLessInt8x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedGreaterUint16x32 x y mask) - // result: (VPMOVMToVec16x32 (VPCMPUWMasked512 [14] x y (VPMOVVec16x32ToM mask))) + // match: (LessInt8x32 x y) + // result: (VPMOVMToVec8x32 (VPCMPB256 [1] x y)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec16x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(14) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) + v.reset(OpAMD64VPMOVMToVec8x32) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB256, typ.Mask) + v0.AuxInt = int8ToAuxInt(1) + v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedGreaterUint16x8(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLessInt8x64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedGreaterUint16x8 x y mask) - // result: (VPMOVMToVec16x8 (VPCMPUWMasked128 [14] x y (VPMOVVec16x8ToM mask))) + // match: (LessInt8x64 x y) + // result: (VPMOVMToVec8x64 (VPCMPB512 [1] x y)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec16x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(14) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) + v.reset(OpAMD64VPMOVMToVec8x64) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB512, typ.Mask) + v0.AuxInt = int8ToAuxInt(1) + v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedGreaterUint32x16(v *Value) bool { +func rewriteValueAMD64_OpLessMaskedFloat32x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedGreaterUint32x16 x y mask) - // result: (VPMOVMToVec32x16 (VPCMPUDMasked512 [14] x y (VPMOVVec32x16ToM mask))) + // match: (LessMaskedFloat32x16 x y mask) + // result: (VPMOVMToVec32x16 (VCMPPSMasked512 [1] x y (VPMOVVec32x16ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(14) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(1) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -38963,21 +38926,21 @@ func rewriteValueAMD64_OpMaskedGreaterUint32x16(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedGreaterUint32x4(v *Value) bool { +func rewriteValueAMD64_OpLessMaskedFloat32x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedGreaterUint32x4 x y mask) - // result: (VPMOVMToVec32x4 (VPCMPUDMasked128 [14] x y (VPMOVVec32x4ToM mask))) + // match: (LessMaskedFloat32x4 x y mask) + // result: (VPMOVMToVec32x4 (VCMPPSMasked128 [1] x y (VPMOVVec32x4ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec32x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(14) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(1) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -38985,21 +38948,21 @@ func rewriteValueAMD64_OpMaskedGreaterUint32x4(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedGreaterUint32x8(v *Value) bool { +func rewriteValueAMD64_OpLessMaskedFloat32x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedGreaterUint32x8 x y mask) - // result: (VPMOVMToVec32x8 (VPCMPUDMasked256 [14] x y (VPMOVVec32x8ToM mask))) + // match: (LessMaskedFloat32x8 x y mask) + // result: (VPMOVMToVec32x8 (VCMPPSMasked256 [1] x y (VPMOVVec32x8ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec32x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(14) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(1) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -39007,21 +38970,21 @@ func rewriteValueAMD64_OpMaskedGreaterUint32x8(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedGreaterUint64x2(v *Value) bool { +func rewriteValueAMD64_OpLessMaskedFloat64x2(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedGreaterUint64x2 x y mask) - // result: (VPMOVMToVec64x2 (VPCMPUQMasked128 [14] x y (VPMOVVec64x2ToM mask))) + // match: (LessMaskedFloat64x2 x y mask) + // result: (VPMOVMToVec64x2 (VCMPPDMasked128 [1] x y (VPMOVVec64x2ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec64x2) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(14) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(1) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -39029,21 +38992,21 @@ func rewriteValueAMD64_OpMaskedGreaterUint64x2(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedGreaterUint64x4(v *Value) bool { +func rewriteValueAMD64_OpLessMaskedFloat64x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedGreaterUint64x4 x y mask) - // result: (VPMOVMToVec64x4 (VPCMPUQMasked256 [14] x y (VPMOVVec64x4ToM mask))) + // match: (LessMaskedFloat64x4 x y mask) + // result: (VPMOVMToVec64x4 (VCMPPDMasked256 [1] x y (VPMOVVec64x4ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec64x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(14) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(1) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -39051,21 +39014,21 @@ func rewriteValueAMD64_OpMaskedGreaterUint64x4(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedGreaterUint64x8(v *Value) bool { +func rewriteValueAMD64_OpLessMaskedFloat64x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedGreaterUint64x8 x y mask) - // result: (VPMOVMToVec64x8 (VPCMPUQMasked512 [14] x y (VPMOVVec64x8ToM mask))) + // match: (LessMaskedFloat64x8 x y mask) + // result: (VPMOVMToVec64x8 (VCMPPDMasked512 [1] x y (VPMOVVec64x8ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(14) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(1) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -39073,87 +39036,87 @@ func rewriteValueAMD64_OpMaskedGreaterUint64x8(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedGreaterUint8x16(v *Value) bool { +func rewriteValueAMD64_OpLessMaskedInt16x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedGreaterUint8x16 x y mask) - // result: (VPMOVMToVec8x16 (VPCMPUBMasked128 [14] x y (VPMOVVec8x16ToM mask))) + // match: (LessMaskedInt16x16 x y mask) + // result: (VPMOVMToVec16x16 (VPCMPWMasked256 [1] x y (VPMOVVec16x16ToM mask))) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPMOVMToVec8x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(14) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v.reset(OpAMD64VPMOVMToVec16x16) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(1) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedGreaterUint8x32(v *Value) bool { +func rewriteValueAMD64_OpLessMaskedInt16x32(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedGreaterUint8x32 x y mask) - // result: (VPMOVMToVec8x32 (VPCMPUBMasked256 [14] x y (VPMOVVec8x32ToM mask))) + // match: (LessMaskedInt16x32 x y mask) + // result: (VPMOVMToVec16x32 (VPCMPWMasked512 [1] x y (VPMOVVec16x32ToM mask))) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPMOVMToVec8x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(14) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v.reset(OpAMD64VPMOVMToVec16x32) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(1) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedGreaterUint8x64(v *Value) bool { +func rewriteValueAMD64_OpLessMaskedInt16x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedGreaterUint8x64 x y mask) - // result: (VPMOVMToVec8x64 (VPCMPUBMasked512 [14] x y (VPMOVVec8x64ToM mask))) + // match: (LessMaskedInt16x8 x y mask) + // result: (VPMOVMToVec16x8 (VPCMPWMasked128 [1] x y (VPMOVVec16x8ToM mask))) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPMOVMToVec8x64) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(14) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) + v.reset(OpAMD64VPMOVMToVec16x8) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(1) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedIsNanFloat32x16(v *Value) bool { +func rewriteValueAMD64_OpLessMaskedInt32x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedIsNanFloat32x16 x y mask) - // result: (VPMOVMToVec32x16 (VCMPPSMasked512 [3] x y (VPMOVVec32x16ToM mask))) + // match: (LessMaskedInt32x16 x y mask) + // result: (VPMOVMToVec32x16 (VPCMPDMasked512 [1] x y (VPMOVVec32x16ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(3) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(1) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -39161,21 +39124,21 @@ func rewriteValueAMD64_OpMaskedIsNanFloat32x16(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedIsNanFloat32x4(v *Value) bool { +func rewriteValueAMD64_OpLessMaskedInt32x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedIsNanFloat32x4 x y mask) - // result: (VPMOVMToVec32x4 (VCMPPSMasked128 [3] x y (VPMOVVec32x4ToM mask))) + // match: (LessMaskedInt32x4 x y mask) + // result: (VPMOVMToVec32x4 (VPCMPDMasked128 [1] x y (VPMOVVec32x4ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec32x4) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(3) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(1) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -39183,21 +39146,21 @@ func rewriteValueAMD64_OpMaskedIsNanFloat32x4(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedIsNanFloat32x8(v *Value) bool { +func rewriteValueAMD64_OpLessMaskedInt32x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedIsNanFloat32x8 x y mask) - // result: (VPMOVMToVec32x8 (VCMPPSMasked256 [3] x y (VPMOVVec32x8ToM mask))) + // match: (LessMaskedInt32x8 x y mask) + // result: (VPMOVMToVec32x8 (VPCMPDMasked256 [1] x y (VPMOVVec32x8ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec32x8) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(3) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(1) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -39205,21 +39168,21 @@ func rewriteValueAMD64_OpMaskedIsNanFloat32x8(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedIsNanFloat64x2(v *Value) bool { +func rewriteValueAMD64_OpLessMaskedInt64x2(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedIsNanFloat64x2 x y mask) - // result: (VPMOVMToVec64x2 (VCMPPDMasked128 [3] x y (VPMOVVec64x2ToM mask))) + // match: (LessMaskedInt64x2 x y mask) + // result: (VPMOVMToVec64x2 (VPCMPQMasked128 [1] x y (VPMOVVec64x2ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec64x2) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(3) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(1) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -39227,21 +39190,21 @@ func rewriteValueAMD64_OpMaskedIsNanFloat64x2(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedIsNanFloat64x4(v *Value) bool { +func rewriteValueAMD64_OpLessMaskedInt64x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedIsNanFloat64x4 x y mask) - // result: (VPMOVMToVec64x4 (VCMPPDMasked256 [3] x y (VPMOVVec64x4ToM mask))) + // match: (LessMaskedInt64x4 x y mask) + // result: (VPMOVMToVec64x4 (VPCMPQMasked256 [1] x y (VPMOVVec64x4ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec64x4) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(3) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(1) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -39249,21 +39212,21 @@ func rewriteValueAMD64_OpMaskedIsNanFloat64x4(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedIsNanFloat64x8(v *Value) bool { +func rewriteValueAMD64_OpLessMaskedInt64x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedIsNanFloat64x8 x y mask) - // result: (VPMOVMToVec64x8 (VCMPPDMasked512 [3] x y (VPMOVVec64x8ToM mask))) + // match: (LessMaskedInt64x8 x y mask) + // result: (VPMOVMToVec64x8 (VPCMPQMasked512 [1] x y (VPMOVVec64x8ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(3) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(1) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -39271,153 +39234,87 @@ func rewriteValueAMD64_OpMaskedIsNanFloat64x8(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedLessEqualFloat32x16(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedLessEqualFloat32x16 x y mask) - // result: (VPMOVMToVec32x16 (VCMPPSMasked512 [2] x y (VPMOVVec32x16ToM mask))) - for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(2) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) - return true - } -} -func rewriteValueAMD64_OpMaskedLessEqualFloat32x4(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedLessEqualFloat32x4 x y mask) - // result: (VPMOVMToVec32x4 (VCMPPSMasked128 [2] x y (VPMOVVec32x4ToM mask))) - for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec32x4) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(2) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) - return true - } -} -func rewriteValueAMD64_OpMaskedLessEqualFloat32x8(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedLessEqualFloat32x8 x y mask) - // result: (VPMOVMToVec32x8 (VCMPPSMasked256 [2] x y (VPMOVVec32x8ToM mask))) - for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec32x8) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(2) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) - return true - } -} -func rewriteValueAMD64_OpMaskedLessEqualFloat64x2(v *Value) bool { +func rewriteValueAMD64_OpLessMaskedInt8x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedLessEqualFloat64x2 x y mask) - // result: (VPMOVMToVec64x2 (VCMPPDMasked128 [2] x y (VPMOVVec64x2ToM mask))) + // match: (LessMaskedInt8x16 x y mask) + // result: (VPMOVMToVec8x16 (VPCMPBMasked128 [1] x y (VPMOVVec8x16ToM mask))) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPMOVMToVec64x2) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(2) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v.reset(OpAMD64VPMOVMToVec8x16) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(1) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedLessEqualFloat64x4(v *Value) bool { +func rewriteValueAMD64_OpLessMaskedInt8x32(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedLessEqualFloat64x4 x y mask) - // result: (VPMOVMToVec64x4 (VCMPPDMasked256 [2] x y (VPMOVVec64x4ToM mask))) + // match: (LessMaskedInt8x32 x y mask) + // result: (VPMOVMToVec8x32 (VPCMPBMasked256 [1] x y (VPMOVVec8x32ToM mask))) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPMOVMToVec64x4) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(2) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VPMOVMToVec8x32) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(1) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedLessEqualFloat64x8(v *Value) bool { +func rewriteValueAMD64_OpLessMaskedInt8x64(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedLessEqualFloat64x8 x y mask) - // result: (VPMOVMToVec64x8 (VCMPPDMasked512 [2] x y (VPMOVVec64x8ToM mask))) + // match: (LessMaskedInt8x64 x y mask) + // result: (VPMOVMToVec8x64 (VPCMPBMasked512 [1] x y (VPMOVVec8x64ToM mask))) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(2) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v.reset(OpAMD64VPMOVMToVec8x64) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(1) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedLessEqualInt16x16(v *Value) bool { +func rewriteValueAMD64_OpLessMaskedUint16x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedLessEqualInt16x16 x y mask) - // result: (VPMOVMToVec16x16 (VPCMPWMasked256 [2] x y (VPMOVVec16x16ToM mask))) + // match: (LessMaskedUint16x16 x y mask) + // result: (VPMOVMToVec16x16 (VPCMPUWMasked256 [1] x y (VPMOVVec16x16ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec16x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(2) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(1) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -39425,21 +39322,21 @@ func rewriteValueAMD64_OpMaskedLessEqualInt16x16(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedLessEqualInt16x32(v *Value) bool { +func rewriteValueAMD64_OpLessMaskedUint16x32(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedLessEqualInt16x32 x y mask) - // result: (VPMOVMToVec16x32 (VPCMPWMasked512 [2] x y (VPMOVVec16x32ToM mask))) + // match: (LessMaskedUint16x32 x y mask) + // result: (VPMOVMToVec16x32 (VPCMPUWMasked512 [1] x y (VPMOVVec16x32ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec16x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(2) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(1) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -39447,21 +39344,21 @@ func rewriteValueAMD64_OpMaskedLessEqualInt16x32(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedLessEqualInt16x8(v *Value) bool { +func rewriteValueAMD64_OpLessMaskedUint16x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedLessEqualInt16x8 x y mask) - // result: (VPMOVMToVec16x8 (VPCMPWMasked128 [2] x y (VPMOVVec16x8ToM mask))) + // match: (LessMaskedUint16x8 x y mask) + // result: (VPMOVMToVec16x8 (VPCMPUWMasked128 [1] x y (VPMOVVec16x8ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec16x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(2) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(1) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -39469,21 +39366,21 @@ func rewriteValueAMD64_OpMaskedLessEqualInt16x8(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedLessEqualInt32x16(v *Value) bool { +func rewriteValueAMD64_OpLessMaskedUint32x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedLessEqualInt32x16 x y mask) - // result: (VPMOVMToVec32x16 (VPCMPDMasked512 [2] x y (VPMOVVec32x16ToM mask))) + // match: (LessMaskedUint32x16 x y mask) + // result: (VPMOVMToVec32x16 (VPCMPUDMasked512 [1] x y (VPMOVVec32x16ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(2) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(1) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -39491,21 +39388,21 @@ func rewriteValueAMD64_OpMaskedLessEqualInt32x16(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedLessEqualInt32x4(v *Value) bool { +func rewriteValueAMD64_OpLessMaskedUint32x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedLessEqualInt32x4 x y mask) - // result: (VPMOVMToVec32x4 (VPCMPDMasked128 [2] x y (VPMOVVec32x4ToM mask))) + // match: (LessMaskedUint32x4 x y mask) + // result: (VPMOVMToVec32x4 (VPCMPUDMasked128 [1] x y (VPMOVVec32x4ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec32x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(2) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(1) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -39513,21 +39410,21 @@ func rewriteValueAMD64_OpMaskedLessEqualInt32x4(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedLessEqualInt32x8(v *Value) bool { +func rewriteValueAMD64_OpLessMaskedUint32x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedLessEqualInt32x8 x y mask) - // result: (VPMOVMToVec32x8 (VPCMPDMasked256 [2] x y (VPMOVVec32x8ToM mask))) + // match: (LessMaskedUint32x8 x y mask) + // result: (VPMOVMToVec32x8 (VPCMPUDMasked256 [1] x y (VPMOVVec32x8ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec32x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(2) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(1) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -39535,21 +39432,21 @@ func rewriteValueAMD64_OpMaskedLessEqualInt32x8(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedLessEqualInt64x2(v *Value) bool { +func rewriteValueAMD64_OpLessMaskedUint64x2(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedLessEqualInt64x2 x y mask) - // result: (VPMOVMToVec64x2 (VPCMPQMasked128 [2] x y (VPMOVVec64x2ToM mask))) + // match: (LessMaskedUint64x2 x y mask) + // result: (VPMOVMToVec64x2 (VPCMPUQMasked128 [1] x y (VPMOVVec64x2ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec64x2) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(2) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(1) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -39557,21 +39454,21 @@ func rewriteValueAMD64_OpMaskedLessEqualInt64x2(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedLessEqualInt64x4(v *Value) bool { +func rewriteValueAMD64_OpLessMaskedUint64x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedLessEqualInt64x4 x y mask) - // result: (VPMOVMToVec64x4 (VPCMPQMasked256 [2] x y (VPMOVVec64x4ToM mask))) + // match: (LessMaskedUint64x4 x y mask) + // result: (VPMOVMToVec64x4 (VPCMPUQMasked256 [1] x y (VPMOVVec64x4ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec64x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(2) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(1) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -39579,21 +39476,21 @@ func rewriteValueAMD64_OpMaskedLessEqualInt64x4(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedLessEqualInt64x8(v *Value) bool { +func rewriteValueAMD64_OpLessMaskedUint64x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedLessEqualInt64x8 x y mask) - // result: (VPMOVMToVec64x8 (VPCMPQMasked512 [2] x y (VPMOVVec64x8ToM mask))) + // match: (LessMaskedUint64x8 x y mask) + // result: (VPMOVMToVec64x8 (VPCMPUQMasked512 [1] x y (VPMOVVec64x8ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(2) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(1) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -39601,21 +39498,21 @@ func rewriteValueAMD64_OpMaskedLessEqualInt64x8(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedLessEqualInt8x16(v *Value) bool { +func rewriteValueAMD64_OpLessMaskedUint8x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedLessEqualInt8x16 x y mask) - // result: (VPMOVMToVec8x16 (VPCMPBMasked128 [2] x y (VPMOVVec8x16ToM mask))) + // match: (LessMaskedUint8x16 x y mask) + // result: (VPMOVMToVec8x16 (VPCMPUBMasked128 [1] x y (VPMOVVec8x16ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec8x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(2) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(1) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -39623,21 +39520,21 @@ func rewriteValueAMD64_OpMaskedLessEqualInt8x16(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedLessEqualInt8x32(v *Value) bool { +func rewriteValueAMD64_OpLessMaskedUint8x32(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedLessEqualInt8x32 x y mask) - // result: (VPMOVMToVec8x32 (VPCMPBMasked256 [2] x y (VPMOVVec8x32ToM mask))) + // match: (LessMaskedUint8x32 x y mask) + // result: (VPMOVMToVec8x32 (VPCMPUBMasked256 [1] x y (VPMOVVec8x32ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec8x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(2) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(1) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -39645,21 +39542,21 @@ func rewriteValueAMD64_OpMaskedLessEqualInt8x32(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedLessEqualInt8x64(v *Value) bool { +func rewriteValueAMD64_OpLessMaskedUint8x64(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedLessEqualInt8x64 x y mask) - // result: (VPMOVMToVec8x64 (VPCMPBMasked512 [2] x y (VPMOVVec8x64ToM mask))) + // match: (LessMaskedUint8x64 x y mask) + // result: (VPMOVMToVec8x64 (VPCMPUBMasked512 [1] x y (VPMOVVec8x64ToM mask))) for { x := v_0 y := v_1 mask := v_2 v.reset(OpAMD64VPMOVMToVec8x64) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(2) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(1) v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) v1.AddArg(mask) v0.AddArg3(x, y, v1) @@ -39667,936 +39564,1083 @@ func rewriteValueAMD64_OpMaskedLessEqualInt8x64(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedLessEqualUint16x16(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLessUint16x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedLessEqualUint16x16 x y mask) - // result: (VPMOVMToVec16x16 (VPCMPUWMasked256 [2] x y (VPMOVVec16x16ToM mask))) + // match: (LessUint16x16 x y) + // result: (VPMOVMToVec16x16 (VPCMPUW256 [1] x y)) for { x := v_0 y := v_1 - mask := v_2 v.reset(OpAMD64VPMOVMToVec16x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(2) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW256, typ.Mask) + v0.AuxInt = int8ToAuxInt(1) + v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedLessEqualUint16x32(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLessUint16x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedLessEqualUint16x32 x y mask) - // result: (VPMOVMToVec16x32 (VPCMPUWMasked512 [2] x y (VPMOVVec16x32ToM mask))) + // match: (LessUint16x32 x y) + // result: (VPMOVMToVec16x32 (VPCMPUW512 [1] x y)) for { x := v_0 y := v_1 - mask := v_2 v.reset(OpAMD64VPMOVMToVec16x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(2) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW512, typ.Mask) + v0.AuxInt = int8ToAuxInt(1) + v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedLessEqualUint16x8(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLessUint16x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedLessEqualUint16x8 x y mask) - // result: (VPMOVMToVec16x8 (VPCMPUWMasked128 [2] x y (VPMOVVec16x8ToM mask))) + // match: (LessUint16x8 x y) + // result: (VPMOVMToVec16x8 (VPCMPUW128 [1] x y)) for { x := v_0 y := v_1 - mask := v_2 v.reset(OpAMD64VPMOVMToVec16x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(2) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW128, typ.Mask) + v0.AuxInt = int8ToAuxInt(1) + v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedLessEqualUint32x16(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLessUint32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedLessEqualUint32x16 x y mask) - // result: (VPMOVMToVec32x16 (VPCMPUDMasked512 [2] x y (VPMOVVec32x16ToM mask))) + // match: (LessUint32x16 x y) + // result: (VPMOVMToVec32x16 (VPCMPUD512 [1] x y)) for { x := v_0 y := v_1 - mask := v_2 v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(2) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD512, typ.Mask) + v0.AuxInt = int8ToAuxInt(1) + v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedLessEqualUint32x4(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLessUint32x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedLessEqualUint32x4 x y mask) - // result: (VPMOVMToVec32x4 (VPCMPUDMasked128 [2] x y (VPMOVVec32x4ToM mask))) + // match: (LessUint32x4 x y) + // result: (VPMOVMToVec32x4 (VPCMPUD128 [1] x y)) for { x := v_0 y := v_1 - mask := v_2 v.reset(OpAMD64VPMOVMToVec32x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(2) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD128, typ.Mask) + v0.AuxInt = int8ToAuxInt(1) + v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedLessEqualUint32x8(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLessUint32x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedLessEqualUint32x8 x y mask) - // result: (VPMOVMToVec32x8 (VPCMPUDMasked256 [2] x y (VPMOVVec32x8ToM mask))) + // match: (LessUint32x8 x y) + // result: (VPMOVMToVec32x8 (VPCMPUD256 [1] x y)) for { x := v_0 y := v_1 - mask := v_2 v.reset(OpAMD64VPMOVMToVec32x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(2) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD256, typ.Mask) + v0.AuxInt = int8ToAuxInt(1) + v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedLessEqualUint64x2(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLessUint64x2(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedLessEqualUint64x2 x y mask) - // result: (VPMOVMToVec64x2 (VPCMPUQMasked128 [2] x y (VPMOVVec64x2ToM mask))) + // match: (LessUint64x2 x y) + // result: (VPMOVMToVec64x2 (VPCMPUQ128 [1] x y)) for { x := v_0 y := v_1 - mask := v_2 v.reset(OpAMD64VPMOVMToVec64x2) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(2) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ128, typ.Mask) + v0.AuxInt = int8ToAuxInt(1) + v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedLessEqualUint64x4(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLessUint64x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedLessEqualUint64x4 x y mask) - // result: (VPMOVMToVec64x4 (VPCMPUQMasked256 [2] x y (VPMOVVec64x4ToM mask))) + // match: (LessUint64x4 x y) + // result: (VPMOVMToVec64x4 (VPCMPUQ256 [1] x y)) for { x := v_0 y := v_1 - mask := v_2 v.reset(OpAMD64VPMOVMToVec64x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(2) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ256, typ.Mask) + v0.AuxInt = int8ToAuxInt(1) + v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedLessEqualUint64x8(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLessUint64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedLessEqualUint64x8 x y mask) - // result: (VPMOVMToVec64x8 (VPCMPUQMasked512 [2] x y (VPMOVVec64x8ToM mask))) + // match: (LessUint64x8 x y) + // result: (VPMOVMToVec64x8 (VPCMPUQ512 [1] x y)) for { x := v_0 y := v_1 - mask := v_2 v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(2) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ512, typ.Mask) + v0.AuxInt = int8ToAuxInt(1) + v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedLessEqualUint8x16(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLessUint8x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedLessEqualUint8x16 x y mask) - // result: (VPMOVMToVec8x16 (VPCMPUBMasked128 [2] x y (VPMOVVec8x16ToM mask))) + // match: (LessUint8x16 x y) + // result: (VPMOVMToVec8x16 (VPCMPUB128 [1] x y)) for { x := v_0 y := v_1 - mask := v_2 v.reset(OpAMD64VPMOVMToVec8x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(2) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB128, typ.Mask) + v0.AuxInt = int8ToAuxInt(1) + v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedLessEqualUint8x32(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLessUint8x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedLessEqualUint8x32 x y mask) - // result: (VPMOVMToVec8x32 (VPCMPUBMasked256 [2] x y (VPMOVVec8x32ToM mask))) + // match: (LessUint8x32 x y) + // result: (VPMOVMToVec8x32 (VPCMPUB256 [1] x y)) for { x := v_0 y := v_1 - mask := v_2 v.reset(OpAMD64VPMOVMToVec8x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(2) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB256, typ.Mask) + v0.AuxInt = int8ToAuxInt(1) + v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedLessEqualUint8x64(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLessUint8x64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedLessEqualUint8x64 x y mask) - // result: (VPMOVMToVec8x64 (VPCMPUBMasked512 [2] x y (VPMOVVec8x64ToM mask))) + // match: (LessUint8x64 x y) + // result: (VPMOVMToVec8x64 (VPCMPUB512 [1] x y)) for { x := v_0 y := v_1 - mask := v_2 v.reset(OpAMD64VPMOVMToVec8x64) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(2) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB512, typ.Mask) + v0.AuxInt = int8ToAuxInt(1) + v0.AddArg2(x, y) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedLessFloat32x16(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLoad(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (Load ptr mem) + // cond: (is64BitInt(t) || isPtr(t)) + // result: (MOVQload ptr mem) + for { + t := v.Type + ptr := v_0 + mem := v_1 + if !(is64BitInt(t) || isPtr(t)) { + break + } + v.reset(OpAMD64MOVQload) + v.AddArg2(ptr, mem) + return true + } + // match: (Load ptr mem) + // cond: is32BitInt(t) + // result: (MOVLload ptr mem) + for { + t := v.Type + ptr := v_0 + mem := v_1 + if !(is32BitInt(t)) { + break + } + v.reset(OpAMD64MOVLload) + v.AddArg2(ptr, mem) + return true + } + // match: (Load ptr mem) + // cond: is16BitInt(t) + // result: (MOVWload ptr mem) + for { + t := v.Type + ptr := v_0 + mem := v_1 + if !(is16BitInt(t)) { + break + } + v.reset(OpAMD64MOVWload) + v.AddArg2(ptr, mem) + return true + } + // match: (Load ptr mem) + // cond: (t.IsBoolean() || is8BitInt(t)) + // result: (MOVBload ptr mem) + for { + t := v.Type + ptr := v_0 + mem := v_1 + if !(t.IsBoolean() || is8BitInt(t)) { + break + } + v.reset(OpAMD64MOVBload) + v.AddArg2(ptr, mem) + return true + } + // match: (Load ptr mem) + // cond: is32BitFloat(t) + // result: (MOVSSload ptr mem) + for { + t := v.Type + ptr := v_0 + mem := v_1 + if !(is32BitFloat(t)) { + break + } + v.reset(OpAMD64MOVSSload) + v.AddArg2(ptr, mem) + return true + } + // match: (Load ptr mem) + // cond: is64BitFloat(t) + // result: (MOVSDload ptr mem) + for { + t := v.Type + ptr := v_0 + mem := v_1 + if !(is64BitFloat(t)) { + break + } + v.reset(OpAMD64MOVSDload) + v.AddArg2(ptr, mem) + return true + } + // match: (Load ptr mem) + // cond: t.Size() == 16 + // result: (VMOVDQUload128 ptr mem) + for { + t := v.Type + ptr := v_0 + mem := v_1 + if !(t.Size() == 16) { + break + } + v.reset(OpAMD64VMOVDQUload128) + v.AddArg2(ptr, mem) + return true + } + // match: (Load ptr mem) + // cond: t.Size() == 32 + // result: (VMOVDQUload256 ptr mem) + for { + t := v.Type + ptr := v_0 + mem := v_1 + if !(t.Size() == 32) { + break + } + v.reset(OpAMD64VMOVDQUload256) + v.AddArg2(ptr, mem) + return true + } + // match: (Load ptr mem) + // cond: t.Size() == 64 + // result: (VMOVDQUload512 ptr mem) + for { + t := v.Type + ptr := v_0 + mem := v_1 + if !(t.Size() == 64) { + break + } + v.reset(OpAMD64VMOVDQUload512) + v.AddArg2(ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpLocalAddr(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (MaskedLessFloat32x16 x y mask) - // result: (VPMOVMToVec32x16 (VCMPPSMasked512 [1] x y (VPMOVVec32x16ToM mask))) + // match: (LocalAddr {sym} base mem) + // cond: t.Elem().HasPointers() + // result: (LEAQ {sym} (SPanchored base mem)) + for { + t := v.Type + sym := auxToSym(v.Aux) + base := v_0 + mem := v_1 + if !(t.Elem().HasPointers()) { + break + } + v.reset(OpAMD64LEAQ) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpSPanchored, typ.Uintptr) + v0.AddArg2(base, mem) + v.AddArg(v0) + return true + } + // match: (LocalAddr {sym} base _) + // cond: !t.Elem().HasPointers() + // result: (LEAQ {sym} base) + for { + t := v.Type + sym := auxToSym(v.Aux) + base := v_0 + if !(!t.Elem().HasPointers()) { + break + } + v.reset(OpAMD64LEAQ) + v.Aux = symToAux(sym) + v.AddArg(base) + return true + } + return false +} +func rewriteValueAMD64_OpLsh16x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Lsh16x16 x y) + // cond: !shiftIsBounded(v) + // result: (ANDL (SHLL x y) (SBBLcarrymask (CMPWconst y [32]))) for { + t := v.Type x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(1) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDL) + v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) + v2.AuxInt = int16ToAuxInt(32) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) + return true + } + // match: (Lsh16x16 x y) + // cond: shiftIsBounded(v) + // result: (SHLL x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHLL) + v.AddArg2(x, y) return true } + return false } -func rewriteValueAMD64_OpMaskedLessFloat32x4(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLsh16x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedLessFloat32x4 x y mask) - // result: (VPMOVMToVec32x4 (VCMPPSMasked128 [1] x y (VPMOVVec32x4ToM mask))) + // match: (Lsh16x32 x y) + // cond: !shiftIsBounded(v) + // result: (ANDL (SHLL x y) (SBBLcarrymask (CMPLconst y [32]))) for { + t := v.Type x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec32x4) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(1) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDL) + v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) + v2.AuxInt = int32ToAuxInt(32) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) + return true + } + // match: (Lsh16x32 x y) + // cond: shiftIsBounded(v) + // result: (SHLL x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHLL) + v.AddArg2(x, y) return true } + return false } -func rewriteValueAMD64_OpMaskedLessFloat32x8(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLsh16x64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedLessFloat32x8 x y mask) - // result: (VPMOVMToVec32x8 (VCMPPSMasked256 [1] x y (VPMOVVec32x8ToM mask))) + // match: (Lsh16x64 x y) + // cond: !shiftIsBounded(v) + // result: (ANDL (SHLL x y) (SBBLcarrymask (CMPQconst y [32]))) for { + t := v.Type x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec32x8) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(1) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDL) + v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) + v2.AuxInt = int32ToAuxInt(32) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) + return true + } + // match: (Lsh16x64 x y) + // cond: shiftIsBounded(v) + // result: (SHLL x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHLL) + v.AddArg2(x, y) return true } + return false } -func rewriteValueAMD64_OpMaskedLessFloat64x2(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLsh16x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedLessFloat64x2 x y mask) - // result: (VPMOVMToVec64x2 (VCMPPDMasked128 [1] x y (VPMOVVec64x2ToM mask))) + // match: (Lsh16x8 x y) + // cond: !shiftIsBounded(v) + // result: (ANDL (SHLL x y) (SBBLcarrymask (CMPBconst y [32]))) for { + t := v.Type x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec64x2) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(1) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDL) + v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) + v2.AuxInt = int8ToAuxInt(32) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) return true } -} -func rewriteValueAMD64_OpMaskedLessFloat64x4(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedLessFloat64x4 x y mask) - // result: (VPMOVMToVec64x4 (VCMPPDMasked256 [1] x y (VPMOVVec64x4ToM mask))) + // match: (Lsh16x8 x y) + // cond: shiftIsBounded(v) + // result: (SHLL x y) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec64x4) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(1) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHLL) + v.AddArg2(x, y) return true } + return false } -func rewriteValueAMD64_OpMaskedLessFloat64x8(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLsh32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedLessFloat64x8 x y mask) - // result: (VPMOVMToVec64x8 (VCMPPDMasked512 [1] x y (VPMOVVec64x8ToM mask))) + // match: (Lsh32x16 x y) + // cond: !shiftIsBounded(v) + // result: (ANDL (SHLL x y) (SBBLcarrymask (CMPWconst y [32]))) for { + t := v.Type x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(1) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDL) + v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) + v2.AuxInt = int16ToAuxInt(32) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) return true } -} -func rewriteValueAMD64_OpMaskedLessInt16x16(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedLessInt16x16 x y mask) - // result: (VPMOVMToVec16x16 (VPCMPWMasked256 [1] x y (VPMOVVec16x16ToM mask))) + // match: (Lsh32x16 x y) + // cond: shiftIsBounded(v) + // result: (SHLL x y) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec16x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(1) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHLL) + v.AddArg2(x, y) return true } + return false } -func rewriteValueAMD64_OpMaskedLessInt16x32(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLsh32x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedLessInt16x32 x y mask) - // result: (VPMOVMToVec16x32 (VPCMPWMasked512 [1] x y (VPMOVVec16x32ToM mask))) + // match: (Lsh32x32 x y) + // cond: !shiftIsBounded(v) + // result: (ANDL (SHLL x y) (SBBLcarrymask (CMPLconst y [32]))) for { + t := v.Type x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec16x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(1) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDL) + v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) + v2.AuxInt = int32ToAuxInt(32) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) return true } -} -func rewriteValueAMD64_OpMaskedLessInt16x8(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedLessInt16x8 x y mask) - // result: (VPMOVMToVec16x8 (VPCMPWMasked128 [1] x y (VPMOVVec16x8ToM mask))) + // match: (Lsh32x32 x y) + // cond: shiftIsBounded(v) + // result: (SHLL x y) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec16x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(1) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHLL) + v.AddArg2(x, y) return true } + return false } -func rewriteValueAMD64_OpMaskedLessInt32x16(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLsh32x64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedLessInt32x16 x y mask) - // result: (VPMOVMToVec32x16 (VPCMPDMasked512 [1] x y (VPMOVVec32x16ToM mask))) + // match: (Lsh32x64 x y) + // cond: !shiftIsBounded(v) + // result: (ANDL (SHLL x y) (SBBLcarrymask (CMPQconst y [32]))) for { + t := v.Type x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(1) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDL) + v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) + v2.AuxInt = int32ToAuxInt(32) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) return true } -} -func rewriteValueAMD64_OpMaskedLessInt32x4(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedLessInt32x4 x y mask) - // result: (VPMOVMToVec32x4 (VPCMPDMasked128 [1] x y (VPMOVVec32x4ToM mask))) + // match: (Lsh32x64 x y) + // cond: shiftIsBounded(v) + // result: (SHLL x y) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec32x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(1) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHLL) + v.AddArg2(x, y) return true } + return false } -func rewriteValueAMD64_OpMaskedLessInt32x8(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLsh32x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedLessInt32x8 x y mask) - // result: (VPMOVMToVec32x8 (VPCMPDMasked256 [1] x y (VPMOVVec32x8ToM mask))) + // match: (Lsh32x8 x y) + // cond: !shiftIsBounded(v) + // result: (ANDL (SHLL x y) (SBBLcarrymask (CMPBconst y [32]))) for { + t := v.Type x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec32x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(1) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDL) + v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) + v2.AuxInt = int8ToAuxInt(32) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) return true } -} -func rewriteValueAMD64_OpMaskedLessInt64x2(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedLessInt64x2 x y mask) - // result: (VPMOVMToVec64x2 (VPCMPQMasked128 [1] x y (VPMOVVec64x2ToM mask))) + // match: (Lsh32x8 x y) + // cond: shiftIsBounded(v) + // result: (SHLL x y) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec64x2) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(1) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHLL) + v.AddArg2(x, y) return true } + return false } -func rewriteValueAMD64_OpMaskedLessInt64x4(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLsh64x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedLessInt64x4 x y mask) - // result: (VPMOVMToVec64x4 (VPCMPQMasked256 [1] x y (VPMOVVec64x4ToM mask))) + // match: (Lsh64x16 x y) + // cond: !shiftIsBounded(v) + // result: (ANDQ (SHLQ x y) (SBBQcarrymask (CMPWconst y [64]))) for { + t := v.Type x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec64x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(1) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDQ) + v0 := b.NewValue0(v.Pos, OpAMD64SHLQ, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) + v2.AuxInt = int16ToAuxInt(64) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) return true } -} -func rewriteValueAMD64_OpMaskedLessInt64x8(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedLessInt64x8 x y mask) - // result: (VPMOVMToVec64x8 (VPCMPQMasked512 [1] x y (VPMOVVec64x8ToM mask))) + // match: (Lsh64x16 x y) + // cond: shiftIsBounded(v) + // result: (SHLQ x y) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(1) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHLQ) + v.AddArg2(x, y) return true } + return false } -func rewriteValueAMD64_OpMaskedLessInt8x16(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLsh64x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedLessInt8x16 x y mask) - // result: (VPMOVMToVec8x16 (VPCMPBMasked128 [1] x y (VPMOVVec8x16ToM mask))) + // match: (Lsh64x32 x y) + // cond: !shiftIsBounded(v) + // result: (ANDQ (SHLQ x y) (SBBQcarrymask (CMPLconst y [64]))) for { + t := v.Type x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec8x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(1) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDQ) + v0 := b.NewValue0(v.Pos, OpAMD64SHLQ, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) + v2.AuxInt = int32ToAuxInt(64) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) return true } -} -func rewriteValueAMD64_OpMaskedLessInt8x32(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedLessInt8x32 x y mask) - // result: (VPMOVMToVec8x32 (VPCMPBMasked256 [1] x y (VPMOVVec8x32ToM mask))) + // match: (Lsh64x32 x y) + // cond: shiftIsBounded(v) + // result: (SHLQ x y) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec8x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(1) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHLQ) + v.AddArg2(x, y) return true } + return false } -func rewriteValueAMD64_OpMaskedLessInt8x64(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLsh64x64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedLessInt8x64 x y mask) - // result: (VPMOVMToVec8x64 (VPCMPBMasked512 [1] x y (VPMOVVec8x64ToM mask))) + // match: (Lsh64x64 x y) + // cond: !shiftIsBounded(v) + // result: (ANDQ (SHLQ x y) (SBBQcarrymask (CMPQconst y [64]))) for { + t := v.Type x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec8x64) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(1) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDQ) + v0 := b.NewValue0(v.Pos, OpAMD64SHLQ, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) + v2.AuxInt = int32ToAuxInt(64) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) return true } -} -func rewriteValueAMD64_OpMaskedLessUint16x16(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedLessUint16x16 x y mask) - // result: (VPMOVMToVec16x16 (VPCMPUWMasked256 [1] x y (VPMOVVec16x16ToM mask))) + // match: (Lsh64x64 x y) + // cond: shiftIsBounded(v) + // result: (SHLQ x y) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec16x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(1) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHLQ) + v.AddArg2(x, y) return true } + return false } -func rewriteValueAMD64_OpMaskedLessUint16x32(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLsh64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedLessUint16x32 x y mask) - // result: (VPMOVMToVec16x32 (VPCMPUWMasked512 [1] x y (VPMOVVec16x32ToM mask))) + // match: (Lsh64x8 x y) + // cond: !shiftIsBounded(v) + // result: (ANDQ (SHLQ x y) (SBBQcarrymask (CMPBconst y [64]))) for { + t := v.Type x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec16x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(1) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDQ) + v0 := b.NewValue0(v.Pos, OpAMD64SHLQ, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) + v2.AuxInt = int8ToAuxInt(64) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) return true } -} -func rewriteValueAMD64_OpMaskedLessUint16x8(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedLessUint16x8 x y mask) - // result: (VPMOVMToVec16x8 (VPCMPUWMasked128 [1] x y (VPMOVVec16x8ToM mask))) + // match: (Lsh64x8 x y) + // cond: shiftIsBounded(v) + // result: (SHLQ x y) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec16x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(1) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHLQ) + v.AddArg2(x, y) return true } + return false } -func rewriteValueAMD64_OpMaskedLessUint32x16(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLsh8x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedLessUint32x16 x y mask) - // result: (VPMOVMToVec32x16 (VPCMPUDMasked512 [1] x y (VPMOVVec32x16ToM mask))) + // match: (Lsh8x16 x y) + // cond: !shiftIsBounded(v) + // result: (ANDL (SHLL x y) (SBBLcarrymask (CMPWconst y [32]))) for { + t := v.Type x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(1) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDL) + v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) + v2.AuxInt = int16ToAuxInt(32) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) + return true + } + // match: (Lsh8x16 x y) + // cond: shiftIsBounded(v) + // result: (SHLL x y) + for { + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHLL) + v.AddArg2(x, y) return true } + return false } -func rewriteValueAMD64_OpMaskedLessUint32x4(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLsh8x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedLessUint32x4 x y mask) - // result: (VPMOVMToVec32x4 (VPCMPUDMasked128 [1] x y (VPMOVVec32x4ToM mask))) + // match: (Lsh8x32 x y) + // cond: !shiftIsBounded(v) + // result: (ANDL (SHLL x y) (SBBLcarrymask (CMPLconst y [32]))) for { + t := v.Type x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec32x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(1) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDL) + v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) + v2.AuxInt = int32ToAuxInt(32) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) return true } -} -func rewriteValueAMD64_OpMaskedLessUint32x8(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedLessUint32x8 x y mask) - // result: (VPMOVMToVec32x8 (VPCMPUDMasked256 [1] x y (VPMOVVec32x8ToM mask))) + // match: (Lsh8x32 x y) + // cond: shiftIsBounded(v) + // result: (SHLL x y) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec32x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(1) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHLL) + v.AddArg2(x, y) return true } + return false } -func rewriteValueAMD64_OpMaskedLessUint64x2(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLsh8x64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedLessUint64x2 x y mask) - // result: (VPMOVMToVec64x2 (VPCMPUQMasked128 [1] x y (VPMOVVec64x2ToM mask))) + // match: (Lsh8x64 x y) + // cond: !shiftIsBounded(v) + // result: (ANDL (SHLL x y) (SBBLcarrymask (CMPQconst y [32]))) for { + t := v.Type x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec64x2) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(1) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDL) + v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) + v2.AuxInt = int32ToAuxInt(32) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) return true } -} -func rewriteValueAMD64_OpMaskedLessUint64x4(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedLessUint64x4 x y mask) - // result: (VPMOVMToVec64x4 (VPCMPUQMasked256 [1] x y (VPMOVVec64x4ToM mask))) + // match: (Lsh8x64 x y) + // cond: shiftIsBounded(v) + // result: (SHLL x y) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec64x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(1) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHLL) + v.AddArg2(x, y) return true } + return false } -func rewriteValueAMD64_OpMaskedLessUint64x8(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpLsh8x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedLessUint64x8 x y mask) - // result: (VPMOVMToVec64x8 (VPCMPUQMasked512 [1] x y (VPMOVVec64x8ToM mask))) + // match: (Lsh8x8 x y) + // cond: !shiftIsBounded(v) + // result: (ANDL (SHLL x y) (SBBLcarrymask (CMPBconst y [32]))) for { + t := v.Type x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(1) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDL) + v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) + v2.AuxInt = int8ToAuxInt(32) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) return true } -} -func rewriteValueAMD64_OpMaskedLessUint8x16(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedLessUint8x16 x y mask) - // result: (VPMOVMToVec8x16 (VPCMPUBMasked128 [1] x y (VPMOVVec8x16ToM mask))) + // match: (Lsh8x8 x y) + // cond: shiftIsBounded(v) + // result: (SHLL x y) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec8x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(1) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHLL) + v.AddArg2(x, y) return true } + return false } -func rewriteValueAMD64_OpMaskedLessUint8x32(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpMax32F(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedLessUint8x32 x y mask) - // result: (VPMOVMToVec8x32 (VPCMPUBMasked256 [1] x y (VPMOVVec8x32ToM mask))) + // match: (Max32F x y) + // result: (Neg32F (Min32F (Neg32F x) (Neg32F y))) for { + t := v.Type x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec8x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(1) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) + v.reset(OpNeg32F) + v.Type = t + v0 := b.NewValue0(v.Pos, OpMin32F, t) + v1 := b.NewValue0(v.Pos, OpNeg32F, t) + v1.AddArg(x) + v2 := b.NewValue0(v.Pos, OpNeg32F, t) + v2.AddArg(y) + v0.AddArg2(v1, v2) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedLessUint8x64(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpMax64F(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedLessUint8x64 x y mask) - // result: (VPMOVMToVec8x64 (VPCMPUBMasked512 [1] x y (VPMOVVec8x64ToM mask))) + // match: (Max64F x y) + // result: (Neg64F (Min64F (Neg64F x) (Neg64F y))) for { + t := v.Type x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec8x64) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(1) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) + v.reset(OpNeg64F) + v.Type = t + v0 := b.NewValue0(v.Pos, OpMin64F, t) + v1 := b.NewValue0(v.Pos, OpNeg64F, t) + v1.AddArg(x) + v2 := b.NewValue0(v.Pos, OpNeg64F, t) + v2.AddArg(y) + v0.AddArg2(v1, v2) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedMaxFloat32x16(v *Value) bool { +func rewriteValueAMD64_OpMaxMaskedFloat32x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMaxFloat32x16 x y mask) + // match: (MaxMaskedFloat32x16 x y mask) // result: (VMAXPSMasked512 x y (VPMOVVec32x16ToM mask)) for { x := v_0 @@ -40609,12 +40653,12 @@ func rewriteValueAMD64_OpMaskedMaxFloat32x16(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedMaxFloat32x4(v *Value) bool { +func rewriteValueAMD64_OpMaxMaskedFloat32x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMaxFloat32x4 x y mask) + // match: (MaxMaskedFloat32x4 x y mask) // result: (VMAXPSMasked128 x y (VPMOVVec32x4ToM mask)) for { x := v_0 @@ -40627,12 +40671,12 @@ func rewriteValueAMD64_OpMaskedMaxFloat32x4(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedMaxFloat32x8(v *Value) bool { +func rewriteValueAMD64_OpMaxMaskedFloat32x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMaxFloat32x8 x y mask) + // match: (MaxMaskedFloat32x8 x y mask) // result: (VMAXPSMasked256 x y (VPMOVVec32x8ToM mask)) for { x := v_0 @@ -40645,12 +40689,12 @@ func rewriteValueAMD64_OpMaskedMaxFloat32x8(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedMaxFloat64x2(v *Value) bool { +func rewriteValueAMD64_OpMaxMaskedFloat64x2(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMaxFloat64x2 x y mask) + // match: (MaxMaskedFloat64x2 x y mask) // result: (VMAXPDMasked128 x y (VPMOVVec64x2ToM mask)) for { x := v_0 @@ -40663,12 +40707,12 @@ func rewriteValueAMD64_OpMaskedMaxFloat64x2(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedMaxFloat64x4(v *Value) bool { +func rewriteValueAMD64_OpMaxMaskedFloat64x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMaxFloat64x4 x y mask) + // match: (MaxMaskedFloat64x4 x y mask) // result: (VMAXPDMasked256 x y (VPMOVVec64x4ToM mask)) for { x := v_0 @@ -40681,12 +40725,12 @@ func rewriteValueAMD64_OpMaskedMaxFloat64x4(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedMaxFloat64x8(v *Value) bool { +func rewriteValueAMD64_OpMaxMaskedFloat64x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMaxFloat64x8 x y mask) + // match: (MaxMaskedFloat64x8 x y mask) // result: (VMAXPDMasked512 x y (VPMOVVec64x8ToM mask)) for { x := v_0 @@ -40699,12 +40743,12 @@ func rewriteValueAMD64_OpMaskedMaxFloat64x8(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedMaxInt16x16(v *Value) bool { +func rewriteValueAMD64_OpMaxMaskedInt16x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMaxInt16x16 x y mask) + // match: (MaxMaskedInt16x16 x y mask) // result: (VPMAXSWMasked256 x y (VPMOVVec16x16ToM mask)) for { x := v_0 @@ -40717,12 +40761,12 @@ func rewriteValueAMD64_OpMaskedMaxInt16x16(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedMaxInt16x32(v *Value) bool { +func rewriteValueAMD64_OpMaxMaskedInt16x32(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMaxInt16x32 x y mask) + // match: (MaxMaskedInt16x32 x y mask) // result: (VPMAXSWMasked512 x y (VPMOVVec16x32ToM mask)) for { x := v_0 @@ -40735,12 +40779,12 @@ func rewriteValueAMD64_OpMaskedMaxInt16x32(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedMaxInt16x8(v *Value) bool { +func rewriteValueAMD64_OpMaxMaskedInt16x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMaxInt16x8 x y mask) + // match: (MaxMaskedInt16x8 x y mask) // result: (VPMAXSWMasked128 x y (VPMOVVec16x8ToM mask)) for { x := v_0 @@ -40753,12 +40797,12 @@ func rewriteValueAMD64_OpMaskedMaxInt16x8(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedMaxInt32x16(v *Value) bool { +func rewriteValueAMD64_OpMaxMaskedInt32x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMaxInt32x16 x y mask) + // match: (MaxMaskedInt32x16 x y mask) // result: (VPMAXSDMasked512 x y (VPMOVVec32x16ToM mask)) for { x := v_0 @@ -40771,12 +40815,12 @@ func rewriteValueAMD64_OpMaskedMaxInt32x16(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedMaxInt32x4(v *Value) bool { +func rewriteValueAMD64_OpMaxMaskedInt32x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMaxInt32x4 x y mask) + // match: (MaxMaskedInt32x4 x y mask) // result: (VPMAXSDMasked128 x y (VPMOVVec32x4ToM mask)) for { x := v_0 @@ -40789,12 +40833,12 @@ func rewriteValueAMD64_OpMaskedMaxInt32x4(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedMaxInt32x8(v *Value) bool { +func rewriteValueAMD64_OpMaxMaskedInt32x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMaxInt32x8 x y mask) + // match: (MaxMaskedInt32x8 x y mask) // result: (VPMAXSDMasked256 x y (VPMOVVec32x8ToM mask)) for { x := v_0 @@ -40807,12 +40851,12 @@ func rewriteValueAMD64_OpMaskedMaxInt32x8(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedMaxInt64x2(v *Value) bool { +func rewriteValueAMD64_OpMaxMaskedInt64x2(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMaxInt64x2 x y mask) + // match: (MaxMaskedInt64x2 x y mask) // result: (VPMAXSQMasked128 x y (VPMOVVec64x2ToM mask)) for { x := v_0 @@ -40825,12 +40869,12 @@ func rewriteValueAMD64_OpMaskedMaxInt64x2(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedMaxInt64x4(v *Value) bool { +func rewriteValueAMD64_OpMaxMaskedInt64x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMaxInt64x4 x y mask) + // match: (MaxMaskedInt64x4 x y mask) // result: (VPMAXSQMasked256 x y (VPMOVVec64x4ToM mask)) for { x := v_0 @@ -40843,12 +40887,12 @@ func rewriteValueAMD64_OpMaskedMaxInt64x4(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedMaxInt64x8(v *Value) bool { +func rewriteValueAMD64_OpMaxMaskedInt64x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMaxInt64x8 x y mask) + // match: (MaxMaskedInt64x8 x y mask) // result: (VPMAXSQMasked512 x y (VPMOVVec64x8ToM mask)) for { x := v_0 @@ -40861,12 +40905,12 @@ func rewriteValueAMD64_OpMaskedMaxInt64x8(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedMaxInt8x16(v *Value) bool { +func rewriteValueAMD64_OpMaxMaskedInt8x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMaxInt8x16 x y mask) + // match: (MaxMaskedInt8x16 x y mask) // result: (VPMAXSBMasked128 x y (VPMOVVec8x16ToM mask)) for { x := v_0 @@ -40879,12 +40923,12 @@ func rewriteValueAMD64_OpMaskedMaxInt8x16(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedMaxInt8x32(v *Value) bool { +func rewriteValueAMD64_OpMaxMaskedInt8x32(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMaxInt8x32 x y mask) + // match: (MaxMaskedInt8x32 x y mask) // result: (VPMAXSBMasked256 x y (VPMOVVec8x32ToM mask)) for { x := v_0 @@ -40897,12 +40941,12 @@ func rewriteValueAMD64_OpMaskedMaxInt8x32(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedMaxInt8x64(v *Value) bool { +func rewriteValueAMD64_OpMaxMaskedInt8x64(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMaxInt8x64 x y mask) + // match: (MaxMaskedInt8x64 x y mask) // result: (VPMAXSBMasked512 x y (VPMOVVec8x64ToM mask)) for { x := v_0 @@ -40915,12 +40959,12 @@ func rewriteValueAMD64_OpMaskedMaxInt8x64(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedMaxUint16x16(v *Value) bool { +func rewriteValueAMD64_OpMaxMaskedUint16x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMaxUint16x16 x y mask) + // match: (MaxMaskedUint16x16 x y mask) // result: (VPMAXUWMasked256 x y (VPMOVVec16x16ToM mask)) for { x := v_0 @@ -40933,12 +40977,12 @@ func rewriteValueAMD64_OpMaskedMaxUint16x16(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedMaxUint16x32(v *Value) bool { +func rewriteValueAMD64_OpMaxMaskedUint16x32(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMaxUint16x32 x y mask) + // match: (MaxMaskedUint16x32 x y mask) // result: (VPMAXUWMasked512 x y (VPMOVVec16x32ToM mask)) for { x := v_0 @@ -40951,12 +40995,12 @@ func rewriteValueAMD64_OpMaskedMaxUint16x32(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedMaxUint16x8(v *Value) bool { +func rewriteValueAMD64_OpMaxMaskedUint16x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMaxUint16x8 x y mask) + // match: (MaxMaskedUint16x8 x y mask) // result: (VPMAXUWMasked128 x y (VPMOVVec16x8ToM mask)) for { x := v_0 @@ -40969,12 +41013,12 @@ func rewriteValueAMD64_OpMaskedMaxUint16x8(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedMaxUint32x16(v *Value) bool { +func rewriteValueAMD64_OpMaxMaskedUint32x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMaxUint32x16 x y mask) + // match: (MaxMaskedUint32x16 x y mask) // result: (VPMAXUDMasked512 x y (VPMOVVec32x16ToM mask)) for { x := v_0 @@ -40987,12 +41031,12 @@ func rewriteValueAMD64_OpMaskedMaxUint32x16(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedMaxUint32x4(v *Value) bool { +func rewriteValueAMD64_OpMaxMaskedUint32x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMaxUint32x4 x y mask) + // match: (MaxMaskedUint32x4 x y mask) // result: (VPMAXUDMasked128 x y (VPMOVVec32x4ToM mask)) for { x := v_0 @@ -41005,12 +41049,12 @@ func rewriteValueAMD64_OpMaskedMaxUint32x4(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedMaxUint32x8(v *Value) bool { +func rewriteValueAMD64_OpMaxMaskedUint32x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMaxUint32x8 x y mask) + // match: (MaxMaskedUint32x8 x y mask) // result: (VPMAXUDMasked256 x y (VPMOVVec32x8ToM mask)) for { x := v_0 @@ -41023,12843 +41067,12799 @@ func rewriteValueAMD64_OpMaskedMaxUint32x8(v *Value) bool { return true } } -func rewriteValueAMD64_OpMaskedMaxUint64x2(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedMaxUint64x2 x y mask) - // result: (VPMAXUQMasked128 x y (VPMOVVec64x2ToM mask)) - for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPMAXUQMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) - return true - } -} -func rewriteValueAMD64_OpMaskedMaxUint64x4(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedMaxUint64x4 x y mask) - // result: (VPMAXUQMasked256 x y (VPMOVVec64x4ToM mask)) - for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPMAXUQMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) - return true - } -} -func rewriteValueAMD64_OpMaskedMaxUint64x8(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedMaxUint64x8 x y mask) - // result: (VPMAXUQMasked512 x y (VPMOVVec64x8ToM mask)) - for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPMAXUQMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) - return true - } -} -func rewriteValueAMD64_OpMaskedMaxUint8x16(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedMaxUint8x16 x y mask) - // result: (VPMAXUBMasked128 x y (VPMOVVec8x16ToM mask)) - for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPMAXUBMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) - return true - } -} -func rewriteValueAMD64_OpMaskedMaxUint8x32(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedMaxUint8x32 x y mask) - // result: (VPMAXUBMasked256 x y (VPMOVVec8x32ToM mask)) - for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPMAXUBMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) - return true - } -} -func rewriteValueAMD64_OpMaskedMaxUint8x64(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedMaxUint8x64 x y mask) - // result: (VPMAXUBMasked512 x y (VPMOVVec8x64ToM mask)) - for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPMAXUBMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) - return true - } -} -func rewriteValueAMD64_OpMaskedMinFloat32x16(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedMinFloat32x16 x y mask) - // result: (VMINPSMasked512 x y (VPMOVVec32x16ToM mask)) - for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VMINPSMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) - return true - } -} -func rewriteValueAMD64_OpMaskedMinFloat32x4(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedMinFloat32x4 x y mask) - // result: (VMINPSMasked128 x y (VPMOVVec32x4ToM mask)) - for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VMINPSMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) - return true - } -} -func rewriteValueAMD64_OpMaskedMinFloat32x8(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedMinFloat32x8 x y mask) - // result: (VMINPSMasked256 x y (VPMOVVec32x8ToM mask)) - for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VMINPSMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) - return true - } -} -func rewriteValueAMD64_OpMaskedMinFloat64x2(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedMinFloat64x2 x y mask) - // result: (VMINPDMasked128 x y (VPMOVVec64x2ToM mask)) - for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VMINPDMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) - return true - } -} -func rewriteValueAMD64_OpMaskedMinFloat64x4(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedMinFloat64x4 x y mask) - // result: (VMINPDMasked256 x y (VPMOVVec64x4ToM mask)) - for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VMINPDMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) - return true - } -} -func rewriteValueAMD64_OpMaskedMinFloat64x8(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedMinFloat64x8 x y mask) - // result: (VMINPDMasked512 x y (VPMOVVec64x8ToM mask)) - for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VMINPDMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) - return true - } -} -func rewriteValueAMD64_OpMaskedMinInt16x16(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedMinInt16x16 x y mask) - // result: (VPMINSWMasked256 x y (VPMOVVec16x16ToM mask)) - for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPMINSWMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) - return true - } -} -func rewriteValueAMD64_OpMaskedMinInt16x32(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedMinInt16x32 x y mask) - // result: (VPMINSWMasked512 x y (VPMOVVec16x32ToM mask)) - for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPMINSWMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) - return true - } -} -func rewriteValueAMD64_OpMaskedMinInt16x8(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedMinInt16x8 x y mask) - // result: (VPMINSWMasked128 x y (VPMOVVec16x8ToM mask)) - for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPMINSWMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) - return true - } -} -func rewriteValueAMD64_OpMaskedMinInt32x16(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedMinInt32x16 x y mask) - // result: (VPMINSDMasked512 x y (VPMOVVec32x16ToM mask)) - for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPMINSDMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) - return true - } -} -func rewriteValueAMD64_OpMaskedMinInt32x4(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedMinInt32x4 x y mask) - // result: (VPMINSDMasked128 x y (VPMOVVec32x4ToM mask)) - for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPMINSDMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) - return true - } -} -func rewriteValueAMD64_OpMaskedMinInt32x8(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedMinInt32x8 x y mask) - // result: (VPMINSDMasked256 x y (VPMOVVec32x8ToM mask)) - for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPMINSDMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) - return true - } -} -func rewriteValueAMD64_OpMaskedMinInt64x2(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedMinInt64x2 x y mask) - // result: (VPMINSQMasked128 x y (VPMOVVec64x2ToM mask)) - for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPMINSQMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) - return true - } -} -func rewriteValueAMD64_OpMaskedMinInt64x4(v *Value) bool { +func rewriteValueAMD64_OpMaxMaskedUint64x2(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMinInt64x4 x y mask) - // result: (VPMINSQMasked256 x y (VPMOVVec64x4ToM mask)) + // match: (MaxMaskedUint64x2 x y mask) + // result: (VPMAXUQMasked128 x y (VPMOVVec64x2ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPMINSQMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VPMAXUQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedMinInt64x8(v *Value) bool { +func rewriteValueAMD64_OpMaxMaskedUint64x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMinInt64x8 x y mask) - // result: (VPMINSQMasked512 x y (VPMOVVec64x8ToM mask)) + // match: (MaxMaskedUint64x4 x y mask) + // result: (VPMAXUQMasked256 x y (VPMOVVec64x4ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPMINSQMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v.reset(OpAMD64VPMAXUQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedMinInt8x16(v *Value) bool { +func rewriteValueAMD64_OpMaxMaskedUint64x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMinInt8x16 x y mask) - // result: (VPMINSBMasked128 x y (VPMOVVec8x16ToM mask)) + // match: (MaxMaskedUint64x8 x y mask) + // result: (VPMAXUQMasked512 x y (VPMOVVec64x8ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPMINSBMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v.reset(OpAMD64VPMAXUQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedMinInt8x32(v *Value) bool { +func rewriteValueAMD64_OpMaxMaskedUint8x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMinInt8x32 x y mask) - // result: (VPMINSBMasked256 x y (VPMOVVec8x32ToM mask)) + // match: (MaxMaskedUint8x16 x y mask) + // result: (VPMAXUBMasked128 x y (VPMOVVec8x16ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPMINSBMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v.reset(OpAMD64VPMAXUBMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedMinInt8x64(v *Value) bool { +func rewriteValueAMD64_OpMaxMaskedUint8x32(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMinInt8x64 x y mask) - // result: (VPMINSBMasked512 x y (VPMOVVec8x64ToM mask)) + // match: (MaxMaskedUint8x32 x y mask) + // result: (VPMAXUBMasked256 x y (VPMOVVec8x32ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPMINSBMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) + v.reset(OpAMD64VPMAXUBMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedMinUint16x16(v *Value) bool { +func rewriteValueAMD64_OpMaxMaskedUint8x64(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMinUint16x16 x y mask) - // result: (VPMINUWMasked256 x y (VPMOVVec16x16ToM mask)) + // match: (MaxMaskedUint8x64 x y mask) + // result: (VPMAXUBMasked512 x y (VPMOVVec8x64ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPMINUWMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v.reset(OpAMD64VPMAXUBMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedMinUint16x32(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpMin32F(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMinUint16x32 x y mask) - // result: (VPMINUWMasked512 x y (VPMOVVec16x32ToM mask)) + // match: (Min32F x y) + // result: (POR (MINSS (MINSS x y) x) (MINSS x y)) for { + t := v.Type x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMINUWMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64POR) + v0 := b.NewValue0(v.Pos, OpAMD64MINSS, t) + v1 := b.NewValue0(v.Pos, OpAMD64MINSS, t) + v1.AddArg2(x, y) + v0.AddArg2(v1, x) + v.AddArg2(v0, v1) return true } } -func rewriteValueAMD64_OpMaskedMinUint16x8(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpMin64F(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMinUint16x8 x y mask) - // result: (VPMINUWMasked128 x y (VPMOVVec16x8ToM mask)) + // match: (Min64F x y) + // result: (POR (MINSD (MINSD x y) x) (MINSD x y)) for { + t := v.Type x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMINUWMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64POR) + v0 := b.NewValue0(v.Pos, OpAMD64MINSD, t) + v1 := b.NewValue0(v.Pos, OpAMD64MINSD, t) + v1.AddArg2(x, y) + v0.AddArg2(v1, x) + v.AddArg2(v0, v1) return true } } -func rewriteValueAMD64_OpMaskedMinUint32x16(v *Value) bool { +func rewriteValueAMD64_OpMinMaskedFloat32x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMinUint32x16 x y mask) - // result: (VPMINUDMasked512 x y (VPMOVVec32x16ToM mask)) + // match: (MinMaskedFloat32x16 x y mask) + // result: (VMINPSMasked512 x y (VPMOVVec32x16ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPMINUDMasked512) + v.reset(OpAMD64VMINPSMasked512) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedMinUint32x4(v *Value) bool { +func rewriteValueAMD64_OpMinMaskedFloat32x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMinUint32x4 x y mask) - // result: (VPMINUDMasked128 x y (VPMOVVec32x4ToM mask)) + // match: (MinMaskedFloat32x4 x y mask) + // result: (VMINPSMasked128 x y (VPMOVVec32x4ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPMINUDMasked128) + v.reset(OpAMD64VMINPSMasked128) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedMinUint32x8(v *Value) bool { +func rewriteValueAMD64_OpMinMaskedFloat32x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMinUint32x8 x y mask) - // result: (VPMINUDMasked256 x y (VPMOVVec32x8ToM mask)) + // match: (MinMaskedFloat32x8 x y mask) + // result: (VMINPSMasked256 x y (VPMOVVec32x8ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPMINUDMasked256) + v.reset(OpAMD64VMINPSMasked256) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedMinUint64x2(v *Value) bool { +func rewriteValueAMD64_OpMinMaskedFloat64x2(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMinUint64x2 x y mask) - // result: (VPMINUQMasked128 x y (VPMOVVec64x2ToM mask)) + // match: (MinMaskedFloat64x2 x y mask) + // result: (VMINPDMasked128 x y (VPMOVVec64x2ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPMINUQMasked128) + v.reset(OpAMD64VMINPDMasked128) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedMinUint64x4(v *Value) bool { +func rewriteValueAMD64_OpMinMaskedFloat64x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMinUint64x4 x y mask) - // result: (VPMINUQMasked256 x y (VPMOVVec64x4ToM mask)) + // match: (MinMaskedFloat64x4 x y mask) + // result: (VMINPDMasked256 x y (VPMOVVec64x4ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPMINUQMasked256) + v.reset(OpAMD64VMINPDMasked256) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedMinUint64x8(v *Value) bool { +func rewriteValueAMD64_OpMinMaskedFloat64x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMinUint64x8 x y mask) - // result: (VPMINUQMasked512 x y (VPMOVVec64x8ToM mask)) + // match: (MinMaskedFloat64x8 x y mask) + // result: (VMINPDMasked512 x y (VPMOVVec64x8ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPMINUQMasked512) + v.reset(OpAMD64VMINPDMasked512) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedMinUint8x16(v *Value) bool { +func rewriteValueAMD64_OpMinMaskedInt16x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMinUint8x16 x y mask) - // result: (VPMINUBMasked128 x y (VPMOVVec8x16ToM mask)) + // match: (MinMaskedInt16x16 x y mask) + // result: (VPMINSWMasked256 x y (VPMOVVec16x16ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPMINUBMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v.reset(OpAMD64VPMINSWMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedMinUint8x32(v *Value) bool { +func rewriteValueAMD64_OpMinMaskedInt16x32(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMinUint8x32 x y mask) - // result: (VPMINUBMasked256 x y (VPMOVVec8x32ToM mask)) + // match: (MinMaskedInt16x32 x y mask) + // result: (VPMINSWMasked512 x y (VPMOVVec16x32ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPMINUBMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v.reset(OpAMD64VPMINSWMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedMinUint8x64(v *Value) bool { +func rewriteValueAMD64_OpMinMaskedInt16x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMinUint8x64 x y mask) - // result: (VPMINUBMasked512 x y (VPMOVVec8x64ToM mask)) + // match: (MinMaskedInt16x8 x y mask) + // result: (VPMINSWMasked128 x y (VPMOVVec16x8ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPMINUBMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) + v.reset(OpAMD64VPMINSWMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedMulByPowOf2Float32x16(v *Value) bool { +func rewriteValueAMD64_OpMinMaskedInt32x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMulByPowOf2Float32x16 x y mask) - // result: (VSCALEFPSMasked512 x y (VPMOVVec32x16ToM mask)) + // match: (MinMaskedInt32x16 x y mask) + // result: (VPMINSDMasked512 x y (VPMOVVec32x16ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VSCALEFPSMasked512) + v.reset(OpAMD64VPMINSDMasked512) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedMulByPowOf2Float32x4(v *Value) bool { +func rewriteValueAMD64_OpMinMaskedInt32x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMulByPowOf2Float32x4 x y mask) - // result: (VSCALEFPSMasked128 x y (VPMOVVec32x4ToM mask)) + // match: (MinMaskedInt32x4 x y mask) + // result: (VPMINSDMasked128 x y (VPMOVVec32x4ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VSCALEFPSMasked128) + v.reset(OpAMD64VPMINSDMasked128) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedMulByPowOf2Float32x8(v *Value) bool { +func rewriteValueAMD64_OpMinMaskedInt32x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMulByPowOf2Float32x8 x y mask) - // result: (VSCALEFPSMasked256 x y (VPMOVVec32x8ToM mask)) + // match: (MinMaskedInt32x8 x y mask) + // result: (VPMINSDMasked256 x y (VPMOVVec32x8ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VSCALEFPSMasked256) + v.reset(OpAMD64VPMINSDMasked256) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedMulByPowOf2Float64x2(v *Value) bool { +func rewriteValueAMD64_OpMinMaskedInt64x2(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMulByPowOf2Float64x2 x y mask) - // result: (VSCALEFPDMasked128 x y (VPMOVVec64x2ToM mask)) + // match: (MinMaskedInt64x2 x y mask) + // result: (VPMINSQMasked128 x y (VPMOVVec64x2ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VSCALEFPDMasked128) + v.reset(OpAMD64VPMINSQMasked128) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedMulByPowOf2Float64x4(v *Value) bool { +func rewriteValueAMD64_OpMinMaskedInt64x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMulByPowOf2Float64x4 x y mask) - // result: (VSCALEFPDMasked256 x y (VPMOVVec64x4ToM mask)) + // match: (MinMaskedInt64x4 x y mask) + // result: (VPMINSQMasked256 x y (VPMOVVec64x4ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VSCALEFPDMasked256) + v.reset(OpAMD64VPMINSQMasked256) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedMulByPowOf2Float64x8(v *Value) bool { +func rewriteValueAMD64_OpMinMaskedInt64x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMulByPowOf2Float64x8 x y mask) - // result: (VSCALEFPDMasked512 x y (VPMOVVec64x8ToM mask)) + // match: (MinMaskedInt64x8 x y mask) + // result: (VPMINSQMasked512 x y (VPMOVVec64x8ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VSCALEFPDMasked512) + v.reset(OpAMD64VPMINSQMasked512) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedMulEvenWidenInt64x2(v *Value) bool { +func rewriteValueAMD64_OpMinMaskedInt8x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMulEvenWidenInt64x2 x y mask) - // result: (VPMULDQMasked128 x y (VPMOVVec64x2ToM mask)) + // match: (MinMaskedInt8x16 x y mask) + // result: (VPMINSBMasked128 x y (VPMOVVec8x16ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPMULDQMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v.reset(OpAMD64VPMINSBMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedMulEvenWidenInt64x4(v *Value) bool { +func rewriteValueAMD64_OpMinMaskedInt8x32(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMulEvenWidenInt64x4 x y mask) - // result: (VPMULDQMasked256 x y (VPMOVVec64x4ToM mask)) + // match: (MinMaskedInt8x32 x y mask) + // result: (VPMINSBMasked256 x y (VPMOVVec8x32ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPMULDQMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VPMINSBMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedMulEvenWidenInt64x8(v *Value) bool { +func rewriteValueAMD64_OpMinMaskedInt8x64(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMulEvenWidenInt64x8 x y mask) - // result: (VPMULDQMasked512 x y (VPMOVVec64x8ToM mask)) + // match: (MinMaskedInt8x64 x y mask) + // result: (VPMINSBMasked512 x y (VPMOVVec8x64ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPMULDQMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v.reset(OpAMD64VPMINSBMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedMulEvenWidenUint64x2(v *Value) bool { +func rewriteValueAMD64_OpMinMaskedUint16x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMulEvenWidenUint64x2 x y mask) - // result: (VPMULUDQMasked128 x y (VPMOVVec64x2ToM mask)) + // match: (MinMaskedUint16x16 x y mask) + // result: (VPMINUWMasked256 x y (VPMOVVec16x16ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPMULUDQMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v.reset(OpAMD64VPMINUWMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedMulEvenWidenUint64x4(v *Value) bool { +func rewriteValueAMD64_OpMinMaskedUint16x32(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMulEvenWidenUint64x4 x y mask) - // result: (VPMULUDQMasked256 x y (VPMOVVec64x4ToM mask)) + // match: (MinMaskedUint16x32 x y mask) + // result: (VPMINUWMasked512 x y (VPMOVVec16x32ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPMULUDQMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VPMINUWMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedMulEvenWidenUint64x8(v *Value) bool { +func rewriteValueAMD64_OpMinMaskedUint16x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMulEvenWidenUint64x8 x y mask) - // result: (VPMULUDQMasked512 x y (VPMOVVec64x8ToM mask)) + // match: (MinMaskedUint16x8 x y mask) + // result: (VPMINUWMasked128 x y (VPMOVVec16x8ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPMULUDQMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v.reset(OpAMD64VPMINUWMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedMulFloat32x16(v *Value) bool { +func rewriteValueAMD64_OpMinMaskedUint32x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMulFloat32x16 x y mask) - // result: (VMULPSMasked512 x y (VPMOVVec32x16ToM mask)) + // match: (MinMaskedUint32x16 x y mask) + // result: (VPMINUDMasked512 x y (VPMOVVec32x16ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VMULPSMasked512) + v.reset(OpAMD64VPMINUDMasked512) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedMulFloat32x4(v *Value) bool { +func rewriteValueAMD64_OpMinMaskedUint32x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMulFloat32x4 x y mask) - // result: (VMULPSMasked128 x y (VPMOVVec32x4ToM mask)) + // match: (MinMaskedUint32x4 x y mask) + // result: (VPMINUDMasked128 x y (VPMOVVec32x4ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VMULPSMasked128) + v.reset(OpAMD64VPMINUDMasked128) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedMulFloat32x8(v *Value) bool { +func rewriteValueAMD64_OpMinMaskedUint32x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMulFloat32x8 x y mask) - // result: (VMULPSMasked256 x y (VPMOVVec32x8ToM mask)) + // match: (MinMaskedUint32x8 x y mask) + // result: (VPMINUDMasked256 x y (VPMOVVec32x8ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VMULPSMasked256) + v.reset(OpAMD64VPMINUDMasked256) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedMulFloat64x2(v *Value) bool { +func rewriteValueAMD64_OpMinMaskedUint64x2(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMulFloat64x2 x y mask) - // result: (VMULPDMasked128 x y (VPMOVVec64x2ToM mask)) + // match: (MinMaskedUint64x2 x y mask) + // result: (VPMINUQMasked128 x y (VPMOVVec64x2ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VMULPDMasked128) + v.reset(OpAMD64VPMINUQMasked128) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedMulFloat64x4(v *Value) bool { +func rewriteValueAMD64_OpMinMaskedUint64x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMulFloat64x4 x y mask) - // result: (VMULPDMasked256 x y (VPMOVVec64x4ToM mask)) + // match: (MinMaskedUint64x4 x y mask) + // result: (VPMINUQMasked256 x y (VPMOVVec64x4ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VMULPDMasked256) + v.reset(OpAMD64VPMINUQMasked256) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedMulFloat64x8(v *Value) bool { +func rewriteValueAMD64_OpMinMaskedUint64x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMulFloat64x8 x y mask) - // result: (VMULPDMasked512 x y (VPMOVVec64x8ToM mask)) + // match: (MinMaskedUint64x8 x y mask) + // result: (VPMINUQMasked512 x y (VPMOVVec64x8ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VMULPDMasked512) + v.reset(OpAMD64VPMINUQMasked512) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedMulHighInt16x16(v *Value) bool { +func rewriteValueAMD64_OpMinMaskedUint8x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMulHighInt16x16 x y mask) - // result: (VPMULHWMasked256 x y (VPMOVVec16x16ToM mask)) + // match: (MinMaskedUint8x16 x y mask) + // result: (VPMINUBMasked128 x y (VPMOVVec8x16ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPMULHWMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v.reset(OpAMD64VPMINUBMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedMulHighInt16x32(v *Value) bool { +func rewriteValueAMD64_OpMinMaskedUint8x32(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMulHighInt16x32 x y mask) - // result: (VPMULHWMasked512 x y (VPMOVVec16x32ToM mask)) + // match: (MinMaskedUint8x32 x y mask) + // result: (VPMINUBMasked256 x y (VPMOVVec8x32ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPMULHWMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v.reset(OpAMD64VPMINUBMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedMulHighInt16x8(v *Value) bool { +func rewriteValueAMD64_OpMinMaskedUint8x64(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMulHighInt16x8 x y mask) - // result: (VPMULHWMasked128 x y (VPMOVVec16x8ToM mask)) + // match: (MinMaskedUint8x64 x y mask) + // result: (VPMINUBMasked512 x y (VPMOVVec8x64ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPMULHWMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v.reset(OpAMD64VPMINUBMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedMulHighUint16x16(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpMod16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMulHighUint16x16 x y mask) - // result: (VPMULHUWMasked256 x y (VPMOVVec16x16ToM mask)) + typ := &b.Func.Config.Types + // match: (Mod16 [a] x y) + // result: (Select1 (DIVW [a] x y)) for { + a := auxIntToBool(v.AuxInt) x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMULHUWMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpSelect1) + v0 := b.NewValue0(v.Pos, OpAMD64DIVW, types.NewTuple(typ.Int16, typ.Int16)) + v0.AuxInt = boolToAuxInt(a) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedMulHighUint16x32(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpMod16u(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMulHighUint16x32 x y mask) - // result: (VPMULHUWMasked512 x y (VPMOVVec16x32ToM mask)) + typ := &b.Func.Config.Types + // match: (Mod16u x y) + // result: (Select1 (DIVWU x y)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMULHUWMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpSelect1) + v0 := b.NewValue0(v.Pos, OpAMD64DIVWU, types.NewTuple(typ.UInt16, typ.UInt16)) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedMulHighUint16x8(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpMod32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMulHighUint16x8 x y mask) - // result: (VPMULHUWMasked128 x y (VPMOVVec16x8ToM mask)) + typ := &b.Func.Config.Types + // match: (Mod32 [a] x y) + // result: (Select1 (DIVL [a] x y)) for { + a := auxIntToBool(v.AuxInt) x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMULHUWMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpSelect1) + v0 := b.NewValue0(v.Pos, OpAMD64DIVL, types.NewTuple(typ.Int32, typ.Int32)) + v0.AuxInt = boolToAuxInt(a) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedMulLowInt16x16(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpMod32u(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMulLowInt16x16 x y mask) - // result: (VPMULLWMasked256 x y (VPMOVVec16x16ToM mask)) + typ := &b.Func.Config.Types + // match: (Mod32u x y) + // result: (Select1 (DIVLU x y)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMULLWMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpSelect1) + v0 := b.NewValue0(v.Pos, OpAMD64DIVLU, types.NewTuple(typ.UInt32, typ.UInt32)) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedMulLowInt16x32(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpMod64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMulLowInt16x32 x y mask) - // result: (VPMULLWMasked512 x y (VPMOVVec16x32ToM mask)) + typ := &b.Func.Config.Types + // match: (Mod64 [a] x y) + // result: (Select1 (DIVQ [a] x y)) for { + a := auxIntToBool(v.AuxInt) x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMULLWMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpSelect1) + v0 := b.NewValue0(v.Pos, OpAMD64DIVQ, types.NewTuple(typ.Int64, typ.Int64)) + v0.AuxInt = boolToAuxInt(a) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedMulLowInt16x8(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpMod64u(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMulLowInt16x8 x y mask) - // result: (VPMULLWMasked128 x y (VPMOVVec16x8ToM mask)) + typ := &b.Func.Config.Types + // match: (Mod64u x y) + // result: (Select1 (DIVQU x y)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMULLWMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpSelect1) + v0 := b.NewValue0(v.Pos, OpAMD64DIVQU, types.NewTuple(typ.UInt64, typ.UInt64)) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedMulLowInt32x16(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpMod8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMulLowInt32x16 x y mask) - // result: (VPMULLDMasked512 x y (VPMOVVec32x16ToM mask)) + typ := &b.Func.Config.Types + // match: (Mod8 x y) + // result: (Select1 (DIVW (SignExt8to16 x) (SignExt8to16 y))) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMULLDMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpSelect1) + v0 := b.NewValue0(v.Pos, OpAMD64DIVW, types.NewTuple(typ.Int16, typ.Int16)) + v1 := b.NewValue0(v.Pos, OpSignExt8to16, typ.Int16) + v1.AddArg(x) + v2 := b.NewValue0(v.Pos, OpSignExt8to16, typ.Int16) + v2.AddArg(y) + v0.AddArg2(v1, v2) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedMulLowInt32x4(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpMod8u(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMulLowInt32x4 x y mask) - // result: (VPMULLDMasked128 x y (VPMOVVec32x4ToM mask)) + typ := &b.Func.Config.Types + // match: (Mod8u x y) + // result: (Select1 (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y))) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMULLDMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpSelect1) + v0 := b.NewValue0(v.Pos, OpAMD64DIVWU, types.NewTuple(typ.UInt16, typ.UInt16)) + v1 := b.NewValue0(v.Pos, OpZeroExt8to16, typ.UInt16) + v1.AddArg(x) + v2 := b.NewValue0(v.Pos, OpZeroExt8to16, typ.UInt16) + v2.AddArg(y) + v0.AddArg2(v1, v2) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedMulLowInt32x8(v *Value) bool { +func rewriteValueAMD64_OpMove(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedMulLowInt32x8 x y mask) - // result: (VPMULLDMasked256 x y (VPMOVVec32x8ToM mask)) + typ := &b.Func.Config.Types + // match: (Move [0] _ _ mem) + // result: mem for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPMULLDMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + if auxIntToInt64(v.AuxInt) != 0 { + break + } + mem := v_2 + v.copyOf(mem) + return true + } + // match: (Move [1] dst src mem) + // result: (MOVBstore dst (MOVBload src mem) mem) + for { + if auxIntToInt64(v.AuxInt) != 1 { + break + } + dst := v_0 + src := v_1 + mem := v_2 + v.reset(OpAMD64MOVBstore) + v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, typ.UInt8) + v0.AddArg2(src, mem) + v.AddArg3(dst, v0, mem) + return true + } + // match: (Move [2] dst src mem) + // result: (MOVWstore dst (MOVWload src mem) mem) + for { + if auxIntToInt64(v.AuxInt) != 2 { + break + } + dst := v_0 + src := v_1 + mem := v_2 + v.reset(OpAMD64MOVWstore) + v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, typ.UInt16) + v0.AddArg2(src, mem) + v.AddArg3(dst, v0, mem) + return true + } + // match: (Move [4] dst src mem) + // result: (MOVLstore dst (MOVLload src mem) mem) + for { + if auxIntToInt64(v.AuxInt) != 4 { + break + } + dst := v_0 + src := v_1 + mem := v_2 + v.reset(OpAMD64MOVLstore) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32) + v0.AddArg2(src, mem) + v.AddArg3(dst, v0, mem) + return true + } + // match: (Move [8] dst src mem) + // result: (MOVQstore dst (MOVQload src mem) mem) + for { + if auxIntToInt64(v.AuxInt) != 8 { + break + } + dst := v_0 + src := v_1 + mem := v_2 + v.reset(OpAMD64MOVQstore) + v0 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64) + v0.AddArg2(src, mem) + v.AddArg3(dst, v0, mem) + return true + } + // match: (Move [16] dst src mem) + // result: (MOVOstore dst (MOVOload src mem) mem) + for { + if auxIntToInt64(v.AuxInt) != 16 { + break + } + dst := v_0 + src := v_1 + mem := v_2 + v.reset(OpAMD64MOVOstore) + v0 := b.NewValue0(v.Pos, OpAMD64MOVOload, types.TypeInt128) + v0.AddArg2(src, mem) + v.AddArg3(dst, v0, mem) + return true + } + // match: (Move [32] dst src mem) + // result: (Move [16] (OffPtr dst [16]) (OffPtr src [16]) (Move [16] dst src mem)) + for { + if auxIntToInt64(v.AuxInt) != 32 { + break + } + dst := v_0 + src := v_1 + mem := v_2 + v.reset(OpMove) + v.AuxInt = int64ToAuxInt(16) + v0 := b.NewValue0(v.Pos, OpOffPtr, dst.Type) + v0.AuxInt = int64ToAuxInt(16) + v0.AddArg(dst) + v1 := b.NewValue0(v.Pos, OpOffPtr, src.Type) + v1.AuxInt = int64ToAuxInt(16) + v1.AddArg(src) + v2 := b.NewValue0(v.Pos, OpMove, types.TypeMem) + v2.AuxInt = int64ToAuxInt(16) + v2.AddArg3(dst, src, mem) + v.AddArg3(v0, v1, v2) + return true + } + // match: (Move [48] dst src mem) + // result: (Move [32] (OffPtr dst [16]) (OffPtr src [16]) (Move [16] dst src mem)) + for { + if auxIntToInt64(v.AuxInt) != 48 { + break + } + dst := v_0 + src := v_1 + mem := v_2 + v.reset(OpMove) + v.AuxInt = int64ToAuxInt(32) + v0 := b.NewValue0(v.Pos, OpOffPtr, dst.Type) + v0.AuxInt = int64ToAuxInt(16) + v0.AddArg(dst) + v1 := b.NewValue0(v.Pos, OpOffPtr, src.Type) + v1.AuxInt = int64ToAuxInt(16) + v1.AddArg(src) + v2 := b.NewValue0(v.Pos, OpMove, types.TypeMem) + v2.AuxInt = int64ToAuxInt(16) + v2.AddArg3(dst, src, mem) + v.AddArg3(v0, v1, v2) + return true + } + // match: (Move [64] dst src mem) + // result: (Move [32] (OffPtr dst [32]) (OffPtr src [32]) (Move [32] dst src mem)) + for { + if auxIntToInt64(v.AuxInt) != 64 { + break + } + dst := v_0 + src := v_1 + mem := v_2 + v.reset(OpMove) + v.AuxInt = int64ToAuxInt(32) + v0 := b.NewValue0(v.Pos, OpOffPtr, dst.Type) + v0.AuxInt = int64ToAuxInt(32) + v0.AddArg(dst) + v1 := b.NewValue0(v.Pos, OpOffPtr, src.Type) + v1.AuxInt = int64ToAuxInt(32) + v1.AddArg(src) + v2 := b.NewValue0(v.Pos, OpMove, types.TypeMem) + v2.AuxInt = int64ToAuxInt(32) + v2.AddArg3(dst, src, mem) + v.AddArg3(v0, v1, v2) + return true + } + // match: (Move [3] dst src mem) + // result: (MOVBstore [2] dst (MOVBload [2] src mem) (MOVWstore dst (MOVWload src mem) mem)) + for { + if auxIntToInt64(v.AuxInt) != 3 { + break + } + dst := v_0 + src := v_1 + mem := v_2 + v.reset(OpAMD64MOVBstore) + v.AuxInt = int32ToAuxInt(2) + v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, typ.UInt8) + v0.AuxInt = int32ToAuxInt(2) + v0.AddArg2(src, mem) + v1 := b.NewValue0(v.Pos, OpAMD64MOVWstore, types.TypeMem) + v2 := b.NewValue0(v.Pos, OpAMD64MOVWload, typ.UInt16) + v2.AddArg2(src, mem) + v1.AddArg3(dst, v2, mem) + v.AddArg3(dst, v0, v1) return true } -} -func rewriteValueAMD64_OpMaskedMulLowInt64x2(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedMulLowInt64x2 x y mask) - // result: (VPMULLQMasked128 x y (VPMOVVec64x2ToM mask)) + // match: (Move [5] dst src mem) + // result: (MOVBstore [4] dst (MOVBload [4] src mem) (MOVLstore dst (MOVLload src mem) mem)) for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPMULLQMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + if auxIntToInt64(v.AuxInt) != 5 { + break + } + dst := v_0 + src := v_1 + mem := v_2 + v.reset(OpAMD64MOVBstore) + v.AuxInt = int32ToAuxInt(4) + v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, typ.UInt8) + v0.AuxInt = int32ToAuxInt(4) + v0.AddArg2(src, mem) + v1 := b.NewValue0(v.Pos, OpAMD64MOVLstore, types.TypeMem) + v2 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32) + v2.AddArg2(src, mem) + v1.AddArg3(dst, v2, mem) + v.AddArg3(dst, v0, v1) return true } -} -func rewriteValueAMD64_OpMaskedMulLowInt64x4(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedMulLowInt64x4 x y mask) - // result: (VPMULLQMasked256 x y (VPMOVVec64x4ToM mask)) + // match: (Move [6] dst src mem) + // result: (MOVWstore [4] dst (MOVWload [4] src mem) (MOVLstore dst (MOVLload src mem) mem)) for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPMULLQMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + if auxIntToInt64(v.AuxInt) != 6 { + break + } + dst := v_0 + src := v_1 + mem := v_2 + v.reset(OpAMD64MOVWstore) + v.AuxInt = int32ToAuxInt(4) + v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, typ.UInt16) + v0.AuxInt = int32ToAuxInt(4) + v0.AddArg2(src, mem) + v1 := b.NewValue0(v.Pos, OpAMD64MOVLstore, types.TypeMem) + v2 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32) + v2.AddArg2(src, mem) + v1.AddArg3(dst, v2, mem) + v.AddArg3(dst, v0, v1) return true } -} -func rewriteValueAMD64_OpMaskedMulLowInt64x8(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedMulLowInt64x8 x y mask) - // result: (VPMULLQMasked512 x y (VPMOVVec64x8ToM mask)) + // match: (Move [7] dst src mem) + // result: (MOVLstore [3] dst (MOVLload [3] src mem) (MOVLstore dst (MOVLload src mem) mem)) for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPMULLQMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + if auxIntToInt64(v.AuxInt) != 7 { + break + } + dst := v_0 + src := v_1 + mem := v_2 + v.reset(OpAMD64MOVLstore) + v.AuxInt = int32ToAuxInt(3) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32) + v0.AuxInt = int32ToAuxInt(3) + v0.AddArg2(src, mem) + v1 := b.NewValue0(v.Pos, OpAMD64MOVLstore, types.TypeMem) + v2 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32) + v2.AddArg2(src, mem) + v1.AddArg3(dst, v2, mem) + v.AddArg3(dst, v0, v1) return true } -} -func rewriteValueAMD64_OpMaskedNotEqualFloat32x16(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedNotEqualFloat32x16 x y mask) - // result: (VPMOVMToVec32x16 (VCMPPSMasked512 [4] x y (VPMOVVec32x16ToM mask))) + // match: (Move [9] dst src mem) + // result: (MOVBstore [8] dst (MOVBload [8] src mem) (MOVQstore dst (MOVQload src mem) mem)) for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(4) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + if auxIntToInt64(v.AuxInt) != 9 { + break + } + dst := v_0 + src := v_1 + mem := v_2 + v.reset(OpAMD64MOVBstore) + v.AuxInt = int32ToAuxInt(8) + v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, typ.UInt8) + v0.AuxInt = int32ToAuxInt(8) + v0.AddArg2(src, mem) + v1 := b.NewValue0(v.Pos, OpAMD64MOVQstore, types.TypeMem) + v2 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64) + v2.AddArg2(src, mem) + v1.AddArg3(dst, v2, mem) + v.AddArg3(dst, v0, v1) return true } -} -func rewriteValueAMD64_OpMaskedNotEqualFloat32x4(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedNotEqualFloat32x4 x y mask) - // result: (VPMOVMToVec32x4 (VCMPPSMasked128 [4] x y (VPMOVVec32x4ToM mask))) + // match: (Move [10] dst src mem) + // result: (MOVWstore [8] dst (MOVWload [8] src mem) (MOVQstore dst (MOVQload src mem) mem)) for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec32x4) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(4) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + if auxIntToInt64(v.AuxInt) != 10 { + break + } + dst := v_0 + src := v_1 + mem := v_2 + v.reset(OpAMD64MOVWstore) + v.AuxInt = int32ToAuxInt(8) + v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, typ.UInt16) + v0.AuxInt = int32ToAuxInt(8) + v0.AddArg2(src, mem) + v1 := b.NewValue0(v.Pos, OpAMD64MOVQstore, types.TypeMem) + v2 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64) + v2.AddArg2(src, mem) + v1.AddArg3(dst, v2, mem) + v.AddArg3(dst, v0, v1) return true } -} -func rewriteValueAMD64_OpMaskedNotEqualFloat32x8(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedNotEqualFloat32x8 x y mask) - // result: (VPMOVMToVec32x8 (VCMPPSMasked256 [4] x y (VPMOVVec32x8ToM mask))) + // match: (Move [11] dst src mem) + // result: (MOVLstore [7] dst (MOVLload [7] src mem) (MOVQstore dst (MOVQload src mem) mem)) for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec32x8) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(4) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + if auxIntToInt64(v.AuxInt) != 11 { + break + } + dst := v_0 + src := v_1 + mem := v_2 + v.reset(OpAMD64MOVLstore) + v.AuxInt = int32ToAuxInt(7) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32) + v0.AuxInt = int32ToAuxInt(7) + v0.AddArg2(src, mem) + v1 := b.NewValue0(v.Pos, OpAMD64MOVQstore, types.TypeMem) + v2 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64) + v2.AddArg2(src, mem) + v1.AddArg3(dst, v2, mem) + v.AddArg3(dst, v0, v1) return true } -} -func rewriteValueAMD64_OpMaskedNotEqualFloat64x2(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedNotEqualFloat64x2 x y mask) - // result: (VPMOVMToVec64x2 (VCMPPDMasked128 [4] x y (VPMOVVec64x2ToM mask))) + // match: (Move [12] dst src mem) + // result: (MOVLstore [8] dst (MOVLload [8] src mem) (MOVQstore dst (MOVQload src mem) mem)) for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec64x2) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(4) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + if auxIntToInt64(v.AuxInt) != 12 { + break + } + dst := v_0 + src := v_1 + mem := v_2 + v.reset(OpAMD64MOVLstore) + v.AuxInt = int32ToAuxInt(8) + v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32) + v0.AuxInt = int32ToAuxInt(8) + v0.AddArg2(src, mem) + v1 := b.NewValue0(v.Pos, OpAMD64MOVQstore, types.TypeMem) + v2 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64) + v2.AddArg2(src, mem) + v1.AddArg3(dst, v2, mem) + v.AddArg3(dst, v0, v1) return true } -} -func rewriteValueAMD64_OpMaskedNotEqualFloat64x4(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedNotEqualFloat64x4 x y mask) - // result: (VPMOVMToVec64x4 (VCMPPDMasked256 [4] x y (VPMOVVec64x4ToM mask))) + // match: (Move [s] dst src mem) + // cond: s >= 13 && s <= 15 + // result: (MOVQstore [int32(s-8)] dst (MOVQload [int32(s-8)] src mem) (MOVQstore dst (MOVQload src mem) mem)) for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec64x4) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(4) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + s := auxIntToInt64(v.AuxInt) + dst := v_0 + src := v_1 + mem := v_2 + if !(s >= 13 && s <= 15) { + break + } + v.reset(OpAMD64MOVQstore) + v.AuxInt = int32ToAuxInt(int32(s - 8)) + v0 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64) + v0.AuxInt = int32ToAuxInt(int32(s - 8)) + v0.AddArg2(src, mem) + v1 := b.NewValue0(v.Pos, OpAMD64MOVQstore, types.TypeMem) + v2 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64) + v2.AddArg2(src, mem) + v1.AddArg3(dst, v2, mem) + v.AddArg3(dst, v0, v1) return true } -} -func rewriteValueAMD64_OpMaskedNotEqualFloat64x8(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedNotEqualFloat64x8 x y mask) - // result: (VPMOVMToVec64x8 (VCMPPDMasked512 [4] x y (VPMOVVec64x8ToM mask))) + // match: (Move [s] dst src mem) + // cond: s > 16 && s%16 != 0 && s%16 <= 8 + // result: (Move [s-s%16] (OffPtr dst [s%16]) (OffPtr src [s%16]) (MOVQstore dst (MOVQload src mem) mem)) for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(4) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + s := auxIntToInt64(v.AuxInt) + dst := v_0 + src := v_1 + mem := v_2 + if !(s > 16 && s%16 != 0 && s%16 <= 8) { + break + } + v.reset(OpMove) + v.AuxInt = int64ToAuxInt(s - s%16) + v0 := b.NewValue0(v.Pos, OpOffPtr, dst.Type) + v0.AuxInt = int64ToAuxInt(s % 16) + v0.AddArg(dst) + v1 := b.NewValue0(v.Pos, OpOffPtr, src.Type) + v1.AuxInt = int64ToAuxInt(s % 16) + v1.AddArg(src) + v2 := b.NewValue0(v.Pos, OpAMD64MOVQstore, types.TypeMem) + v3 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64) + v3.AddArg2(src, mem) + v2.AddArg3(dst, v3, mem) + v.AddArg3(v0, v1, v2) return true } -} -func rewriteValueAMD64_OpMaskedNotEqualInt16x16(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedNotEqualInt16x16 x y mask) - // result: (VPMOVMToVec16x16 (VPCMPWMasked256 [4] x y (VPMOVVec16x16ToM mask))) + // match: (Move [s] dst src mem) + // cond: s > 16 && s%16 != 0 && s%16 > 8 + // result: (Move [s-s%16] (OffPtr dst [s%16]) (OffPtr src [s%16]) (MOVOstore dst (MOVOload src mem) mem)) for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec16x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(4) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + s := auxIntToInt64(v.AuxInt) + dst := v_0 + src := v_1 + mem := v_2 + if !(s > 16 && s%16 != 0 && s%16 > 8) { + break + } + v.reset(OpMove) + v.AuxInt = int64ToAuxInt(s - s%16) + v0 := b.NewValue0(v.Pos, OpOffPtr, dst.Type) + v0.AuxInt = int64ToAuxInt(s % 16) + v0.AddArg(dst) + v1 := b.NewValue0(v.Pos, OpOffPtr, src.Type) + v1.AuxInt = int64ToAuxInt(s % 16) + v1.AddArg(src) + v2 := b.NewValue0(v.Pos, OpAMD64MOVOstore, types.TypeMem) + v3 := b.NewValue0(v.Pos, OpAMD64MOVOload, types.TypeInt128) + v3.AddArg2(src, mem) + v2.AddArg3(dst, v3, mem) + v.AddArg3(v0, v1, v2) return true } -} -func rewriteValueAMD64_OpMaskedNotEqualInt16x32(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedNotEqualInt16x32 x y mask) - // result: (VPMOVMToVec16x32 (VPCMPWMasked512 [4] x y (VPMOVVec16x32ToM mask))) + // match: (Move [s] dst src mem) + // cond: s > 64 && s <= 16*64 && s%16 == 0 && logLargeCopy(v, s) + // result: (DUFFCOPY [s] dst src mem) for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec16x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(4) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + s := auxIntToInt64(v.AuxInt) + dst := v_0 + src := v_1 + mem := v_2 + if !(s > 64 && s <= 16*64 && s%16 == 0 && logLargeCopy(v, s)) { + break + } + v.reset(OpAMD64DUFFCOPY) + v.AuxInt = int64ToAuxInt(s) + v.AddArg3(dst, src, mem) return true } -} -func rewriteValueAMD64_OpMaskedNotEqualInt16x8(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedNotEqualInt16x8 x y mask) - // result: (VPMOVMToVec16x8 (VPCMPWMasked128 [4] x y (VPMOVVec16x8ToM mask))) + // match: (Move [s] dst src mem) + // cond: s > 16*64 && s%8 == 0 && logLargeCopy(v, s) + // result: (REPMOVSQ dst src (MOVQconst [s/8]) mem) for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPMOVMToVec16x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(4) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + s := auxIntToInt64(v.AuxInt) + dst := v_0 + src := v_1 + mem := v_2 + if !(s > 16*64 && s%8 == 0 && logLargeCopy(v, s)) { + break + } + v.reset(OpAMD64REPMOVSQ) + v0 := b.NewValue0(v.Pos, OpAMD64MOVQconst, typ.UInt64) + v0.AuxInt = int64ToAuxInt(s / 8) + v.AddArg4(dst, src, v0, mem) return true } + return false } -func rewriteValueAMD64_OpMaskedNotEqualInt32x16(v *Value) bool { +func rewriteValueAMD64_OpMulByPowOf2MaskedFloat32x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedNotEqualInt32x16 x y mask) - // result: (VPMOVMToVec32x16 (VPCMPDMasked512 [4] x y (VPMOVVec32x16ToM mask))) + // match: (MulByPowOf2MaskedFloat32x16 x y mask) + // result: (VSCALEFPSMasked512 x y (VPMOVVec32x16ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(4) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + v.reset(OpAMD64VSCALEFPSMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedNotEqualInt32x4(v *Value) bool { +func rewriteValueAMD64_OpMulByPowOf2MaskedFloat32x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedNotEqualInt32x4 x y mask) - // result: (VPMOVMToVec32x4 (VPCMPDMasked128 [4] x y (VPMOVVec32x4ToM mask))) + // match: (MulByPowOf2MaskedFloat32x4 x y mask) + // result: (VSCALEFPSMasked128 x y (VPMOVVec32x4ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPMOVMToVec32x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(4) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + v.reset(OpAMD64VSCALEFPSMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedNotEqualInt32x8(v *Value) bool { +func rewriteValueAMD64_OpMulByPowOf2MaskedFloat32x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedNotEqualInt32x8 x y mask) - // result: (VPMOVMToVec32x8 (VPCMPDMasked256 [4] x y (VPMOVVec32x8ToM mask))) + // match: (MulByPowOf2MaskedFloat32x8 x y mask) + // result: (VSCALEFPSMasked256 x y (VPMOVVec32x8ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPMOVMToVec32x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(4) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + v.reset(OpAMD64VSCALEFPSMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedNotEqualInt64x2(v *Value) bool { +func rewriteValueAMD64_OpMulByPowOf2MaskedFloat64x2(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedNotEqualInt64x2 x y mask) - // result: (VPMOVMToVec64x2 (VPCMPQMasked128 [4] x y (VPMOVVec64x2ToM mask))) + // match: (MulByPowOf2MaskedFloat64x2 x y mask) + // result: (VSCALEFPDMasked128 x y (VPMOVVec64x2ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPMOVMToVec64x2) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(4) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + v.reset(OpAMD64VSCALEFPDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedNotEqualInt64x4(v *Value) bool { +func rewriteValueAMD64_OpMulByPowOf2MaskedFloat64x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedNotEqualInt64x4 x y mask) - // result: (VPMOVMToVec64x4 (VPCMPQMasked256 [4] x y (VPMOVVec64x4ToM mask))) + // match: (MulByPowOf2MaskedFloat64x4 x y mask) + // result: (VSCALEFPDMasked256 x y (VPMOVVec64x4ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPMOVMToVec64x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(4) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + v.reset(OpAMD64VSCALEFPDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedNotEqualInt64x8(v *Value) bool { +func rewriteValueAMD64_OpMulByPowOf2MaskedFloat64x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedNotEqualInt64x8 x y mask) - // result: (VPMOVMToVec64x8 (VPCMPQMasked512 [4] x y (VPMOVVec64x8ToM mask))) + // match: (MulByPowOf2MaskedFloat64x8 x y mask) + // result: (VSCALEFPDMasked512 x y (VPMOVVec64x8ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(4) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + v.reset(OpAMD64VSCALEFPDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedNotEqualInt8x16(v *Value) bool { +func rewriteValueAMD64_OpMulEvenWidenMaskedInt64x2(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedNotEqualInt8x16 x y mask) - // result: (VPMOVMToVec8x16 (VPCMPBMasked128 [4] x y (VPMOVVec8x16ToM mask))) + // match: (MulEvenWidenMaskedInt64x2 x y mask) + // result: (VPMULDQMasked128 x y (VPMOVVec64x2ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPMOVMToVec8x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(4) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + v.reset(OpAMD64VPMULDQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedNotEqualInt8x32(v *Value) bool { +func rewriteValueAMD64_OpMulEvenWidenMaskedInt64x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedNotEqualInt8x32 x y mask) - // result: (VPMOVMToVec8x32 (VPCMPBMasked256 [4] x y (VPMOVVec8x32ToM mask))) + // match: (MulEvenWidenMaskedInt64x4 x y mask) + // result: (VPMULDQMasked256 x y (VPMOVVec64x4ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPMOVMToVec8x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(4) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + v.reset(OpAMD64VPMULDQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedNotEqualInt8x64(v *Value) bool { +func rewriteValueAMD64_OpMulEvenWidenMaskedInt64x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedNotEqualInt8x64 x y mask) - // result: (VPMOVMToVec8x64 (VPCMPBMasked512 [4] x y (VPMOVVec8x64ToM mask))) + // match: (MulEvenWidenMaskedInt64x8 x y mask) + // result: (VPMULDQMasked512 x y (VPMOVVec64x8ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPMOVMToVec8x64) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(4) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + v.reset(OpAMD64VPMULDQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedNotEqualUint16x16(v *Value) bool { +func rewriteValueAMD64_OpMulEvenWidenMaskedUint64x2(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedNotEqualUint16x16 x y mask) - // result: (VPMOVMToVec16x16 (VPCMPUWMasked256 [4] x y (VPMOVVec16x16ToM mask))) + // match: (MulEvenWidenMaskedUint64x2 x y mask) + // result: (VPMULUDQMasked128 x y (VPMOVVec64x2ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPMOVMToVec16x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(4) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + v.reset(OpAMD64VPMULUDQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedNotEqualUint16x32(v *Value) bool { +func rewriteValueAMD64_OpMulEvenWidenMaskedUint64x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedNotEqualUint16x32 x y mask) - // result: (VPMOVMToVec16x32 (VPCMPUWMasked512 [4] x y (VPMOVVec16x32ToM mask))) + // match: (MulEvenWidenMaskedUint64x4 x y mask) + // result: (VPMULUDQMasked256 x y (VPMOVVec64x4ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPMOVMToVec16x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(4) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + v.reset(OpAMD64VPMULUDQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedNotEqualUint16x8(v *Value) bool { +func rewriteValueAMD64_OpMulEvenWidenMaskedUint64x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedNotEqualUint16x8 x y mask) - // result: (VPMOVMToVec16x8 (VPCMPUWMasked128 [4] x y (VPMOVVec16x8ToM mask))) + // match: (MulEvenWidenMaskedUint64x8 x y mask) + // result: (VPMULUDQMasked512 x y (VPMOVVec64x8ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPMOVMToVec16x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(4) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + v.reset(OpAMD64VPMULUDQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedNotEqualUint32x16(v *Value) bool { +func rewriteValueAMD64_OpMulHighMaskedInt16x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedNotEqualUint32x16 x y mask) - // result: (VPMOVMToVec32x16 (VPCMPUDMasked512 [4] x y (VPMOVVec32x16ToM mask))) + // match: (MulHighMaskedInt16x16 x y mask) + // result: (VPMULHWMasked256 x y (VPMOVVec16x16ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(4) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + v.reset(OpAMD64VPMULHWMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedNotEqualUint32x4(v *Value) bool { +func rewriteValueAMD64_OpMulHighMaskedInt16x32(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedNotEqualUint32x4 x y mask) - // result: (VPMOVMToVec32x4 (VPCMPUDMasked128 [4] x y (VPMOVVec32x4ToM mask))) + // match: (MulHighMaskedInt16x32 x y mask) + // result: (VPMULHWMasked512 x y (VPMOVVec16x32ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPMOVMToVec32x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(4) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + v.reset(OpAMD64VPMULHWMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedNotEqualUint32x8(v *Value) bool { +func rewriteValueAMD64_OpMulHighMaskedInt16x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedNotEqualUint32x8 x y mask) - // result: (VPMOVMToVec32x8 (VPCMPUDMasked256 [4] x y (VPMOVVec32x8ToM mask))) + // match: (MulHighMaskedInt16x8 x y mask) + // result: (VPMULHWMasked128 x y (VPMOVVec16x8ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPMOVMToVec32x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(4) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + v.reset(OpAMD64VPMULHWMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedNotEqualUint64x2(v *Value) bool { +func rewriteValueAMD64_OpMulHighMaskedUint16x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedNotEqualUint64x2 x y mask) - // result: (VPMOVMToVec64x2 (VPCMPUQMasked128 [4] x y (VPMOVVec64x2ToM mask))) + // match: (MulHighMaskedUint16x16 x y mask) + // result: (VPMULHUWMasked256 x y (VPMOVVec16x16ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPMOVMToVec64x2) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(4) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + v.reset(OpAMD64VPMULHUWMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedNotEqualUint64x4(v *Value) bool { +func rewriteValueAMD64_OpMulHighMaskedUint16x32(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedNotEqualUint64x4 x y mask) - // result: (VPMOVMToVec64x4 (VPCMPUQMasked256 [4] x y (VPMOVVec64x4ToM mask))) + // match: (MulHighMaskedUint16x32 x y mask) + // result: (VPMULHUWMasked512 x y (VPMOVVec16x32ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPMOVMToVec64x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(4) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + v.reset(OpAMD64VPMULHUWMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedNotEqualUint64x8(v *Value) bool { +func rewriteValueAMD64_OpMulHighMaskedUint16x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedNotEqualUint64x8 x y mask) - // result: (VPMOVMToVec64x8 (VPCMPUQMasked512 [4] x y (VPMOVVec64x8ToM mask))) + // match: (MulHighMaskedUint16x8 x y mask) + // result: (VPMULHUWMasked128 x y (VPMOVVec16x8ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(4) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + v.reset(OpAMD64VPMULHUWMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedNotEqualUint8x16(v *Value) bool { +func rewriteValueAMD64_OpMulLowMaskedInt16x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedNotEqualUint8x16 x y mask) - // result: (VPMOVMToVec8x16 (VPCMPUBMasked128 [4] x y (VPMOVVec8x16ToM mask))) + // match: (MulLowMaskedInt16x16 x y mask) + // result: (VPMULLWMasked256 x y (VPMOVVec16x16ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPMOVMToVec8x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked128, typ.Mask) - v0.AuxInt = int8ToAuxInt(4) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + v.reset(OpAMD64VPMULLWMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedNotEqualUint8x32(v *Value) bool { +func rewriteValueAMD64_OpMulLowMaskedInt16x32(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedNotEqualUint8x32 x y mask) - // result: (VPMOVMToVec8x32 (VPCMPUBMasked256 [4] x y (VPMOVVec8x32ToM mask))) + // match: (MulLowMaskedInt16x32 x y mask) + // result: (VPMULLWMasked512 x y (VPMOVVec16x32ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPMOVMToVec8x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked256, typ.Mask) - v0.AuxInt = int8ToAuxInt(4) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + v.reset(OpAMD64VPMULLWMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedNotEqualUint8x64(v *Value) bool { +func rewriteValueAMD64_OpMulLowMaskedInt16x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (MaskedNotEqualUint8x64 x y mask) - // result: (VPMOVMToVec8x64 (VPCMPUBMasked512 [4] x y (VPMOVVec8x64ToM mask))) + // match: (MulLowMaskedInt16x8 x y mask) + // result: (VPMULLWMasked128 x y (VPMOVVec16x8ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPMOVMToVec8x64) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked512, typ.Mask) - v0.AuxInt = int8ToAuxInt(4) - v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) - v1.AddArg(mask) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + v.reset(OpAMD64VPMULLWMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedOrInt32x16(v *Value) bool { +func rewriteValueAMD64_OpMulLowMaskedInt32x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedOrInt32x16 x y mask) - // result: (VPORDMasked512 x y (VPMOVVec32x16ToM mask)) + // match: (MulLowMaskedInt32x16 x y mask) + // result: (VPMULLDMasked512 x y (VPMOVVec32x16ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPORDMasked512) + v.reset(OpAMD64VPMULLDMasked512) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedOrInt32x4(v *Value) bool { +func rewriteValueAMD64_OpMulLowMaskedInt32x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedOrInt32x4 x y mask) - // result: (VPORDMasked128 x y (VPMOVVec32x4ToM mask)) + // match: (MulLowMaskedInt32x4 x y mask) + // result: (VPMULLDMasked128 x y (VPMOVVec32x4ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPORDMasked128) + v.reset(OpAMD64VPMULLDMasked128) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedOrInt32x8(v *Value) bool { +func rewriteValueAMD64_OpMulLowMaskedInt32x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedOrInt32x8 x y mask) - // result: (VPORDMasked256 x y (VPMOVVec32x8ToM mask)) + // match: (MulLowMaskedInt32x8 x y mask) + // result: (VPMULLDMasked256 x y (VPMOVVec32x8ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPORDMasked256) + v.reset(OpAMD64VPMULLDMasked256) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedOrInt64x2(v *Value) bool { +func rewriteValueAMD64_OpMulLowMaskedInt64x2(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedOrInt64x2 x y mask) - // result: (VPORQMasked128 x y (VPMOVVec64x2ToM mask)) + // match: (MulLowMaskedInt64x2 x y mask) + // result: (VPMULLQMasked128 x y (VPMOVVec64x2ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPORQMasked128) + v.reset(OpAMD64VPMULLQMasked128) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedOrInt64x4(v *Value) bool { +func rewriteValueAMD64_OpMulLowMaskedInt64x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedOrInt64x4 x y mask) - // result: (VPORQMasked256 x y (VPMOVVec64x4ToM mask)) + // match: (MulLowMaskedInt64x4 x y mask) + // result: (VPMULLQMasked256 x y (VPMOVVec64x4ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPORQMasked256) + v.reset(OpAMD64VPMULLQMasked256) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedOrInt64x8(v *Value) bool { +func rewriteValueAMD64_OpMulLowMaskedInt64x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedOrInt64x8 x y mask) - // result: (VPORQMasked512 x y (VPMOVVec64x8ToM mask)) + // match: (MulLowMaskedInt64x8 x y mask) + // result: (VPMULLQMasked512 x y (VPMOVVec64x8ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPORQMasked512) + v.reset(OpAMD64VPMULLQMasked512) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedOrUint32x16(v *Value) bool { +func rewriteValueAMD64_OpMulMaskedFloat32x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedOrUint32x16 x y mask) - // result: (VPORDMasked512 x y (VPMOVVec32x16ToM mask)) + // match: (MulMaskedFloat32x16 x y mask) + // result: (VMULPSMasked512 x y (VPMOVVec32x16ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPORDMasked512) + v.reset(OpAMD64VMULPSMasked512) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedOrUint32x4(v *Value) bool { +func rewriteValueAMD64_OpMulMaskedFloat32x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedOrUint32x4 x y mask) - // result: (VPORDMasked128 x y (VPMOVVec32x4ToM mask)) + // match: (MulMaskedFloat32x4 x y mask) + // result: (VMULPSMasked128 x y (VPMOVVec32x4ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPORDMasked128) + v.reset(OpAMD64VMULPSMasked128) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedOrUint32x8(v *Value) bool { +func rewriteValueAMD64_OpMulMaskedFloat32x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedOrUint32x8 x y mask) - // result: (VPORDMasked256 x y (VPMOVVec32x8ToM mask)) + // match: (MulMaskedFloat32x8 x y mask) + // result: (VMULPSMasked256 x y (VPMOVVec32x8ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPORDMasked256) + v.reset(OpAMD64VMULPSMasked256) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedOrUint64x2(v *Value) bool { +func rewriteValueAMD64_OpMulMaskedFloat64x2(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedOrUint64x2 x y mask) - // result: (VPORQMasked128 x y (VPMOVVec64x2ToM mask)) + // match: (MulMaskedFloat64x2 x y mask) + // result: (VMULPDMasked128 x y (VPMOVVec64x2ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPORQMasked128) + v.reset(OpAMD64VMULPDMasked128) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedOrUint64x4(v *Value) bool { +func rewriteValueAMD64_OpMulMaskedFloat64x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedOrUint64x4 x y mask) - // result: (VPORQMasked256 x y (VPMOVVec64x4ToM mask)) + // match: (MulMaskedFloat64x4 x y mask) + // result: (VMULPDMasked256 x y (VPMOVVec64x4ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPORQMasked256) + v.reset(OpAMD64VMULPDMasked256) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedOrUint64x8(v *Value) bool { +func rewriteValueAMD64_OpMulMaskedFloat64x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedOrUint64x8 x y mask) - // result: (VPORQMasked512 x y (VPMOVVec64x8ToM mask)) + // match: (MulMaskedFloat64x8 x y mask) + // result: (VMULPDMasked512 x y (VPMOVVec64x8ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPORQMasked512) + v.reset(OpAMD64VMULPDMasked512) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedPairDotProdAccumulateInt32x16(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpNeg32F(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (Neg32F x) + // result: (PXOR x (MOVSSconst [float32(math.Copysign(0, -1))])) + for { + x := v_0 + v.reset(OpAMD64PXOR) + v0 := b.NewValue0(v.Pos, OpAMD64MOVSSconst, typ.Float32) + v0.AuxInt = float32ToAuxInt(float32(math.Copysign(0, -1))) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpNeg64F(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (Neg64F x) + // result: (PXOR x (MOVSDconst [math.Copysign(0, -1)])) + for { + x := v_0 + v.reset(OpAMD64PXOR) + v0 := b.NewValue0(v.Pos, OpAMD64MOVSDconst, typ.Float64) + v0.AuxInt = float64ToAuxInt(math.Copysign(0, -1)) + v.AddArg2(x, v0) + return true + } +} +func rewriteValueAMD64_OpNeq16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedPairDotProdAccumulateInt32x16 x y z mask) - // result: (VPDPWSSDMasked512 x y z (VPMOVVec32x16ToM mask)) + // match: (Neq16 x y) + // result: (SETNE (CMPW x y)) for { x := v_0 y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VPDPWSSDMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + v.reset(OpAMD64SETNE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPW, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedPairDotProdAccumulateInt32x4(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpNeq32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedPairDotProdAccumulateInt32x4 x y z mask) - // result: (VPDPWSSDMasked128 x y z (VPMOVVec32x4ToM mask)) + // match: (Neq32 x y) + // result: (SETNE (CMPL x y)) for { x := v_0 y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VPDPWSSDMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + v.reset(OpAMD64SETNE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPL, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedPairDotProdAccumulateInt32x8(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpNeq32F(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedPairDotProdAccumulateInt32x8 x y z mask) - // result: (VPDPWSSDMasked256 x y z (VPMOVVec32x8ToM mask)) + // match: (Neq32F x y) + // result: (SETNEF (UCOMISS x y)) for { x := v_0 y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VPDPWSSDMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + v.reset(OpAMD64SETNEF) + v0 := b.NewValue0(v.Pos, OpAMD64UCOMISS, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedPairDotProdInt16x16(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpNeq64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedPairDotProdInt16x16 x y mask) - // result: (VPMADDWDMasked256 x y (VPMOVVec16x16ToM mask)) + // match: (Neq64 x y) + // result: (SETNE (CMPQ x y)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMADDWDMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64SETNE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpNeq64F(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Neq64F x y) + // result: (SETNEF (UCOMISD x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64SETNEF) + v0 := b.NewValue0(v.Pos, OpAMD64UCOMISD, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpNeq8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (Neq8 x y) + // result: (SETNE (CMPB x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64SETNE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpNeqB(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (NeqB x y) + // result: (SETNE (CMPB x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64SETNE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpNeqPtr(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (NeqPtr x y) + // result: (SETNE (CMPQ x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64SETNE) + v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpNot(v *Value) bool { + v_0 := v.Args[0] + // match: (Not x) + // result: (XORLconst [1] x) + for { + x := v_0 + v.reset(OpAMD64XORLconst) + v.AuxInt = int32ToAuxInt(1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpNotEqualFloat32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (NotEqualFloat32x16 x y) + // result: (VPMOVMToVec32x16 (VCMPPS512 [4] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec32x16) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask) + v0.AuxInt = int8ToAuxInt(4) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpNotEqualFloat32x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (NotEqualFloat32x4 x y) + // result: (VCMPPS128 [4] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VCMPPS128) + v.AuxInt = int8ToAuxInt(4) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpNotEqualFloat32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (NotEqualFloat32x8 x y) + // result: (VCMPPS256 [4] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VCMPPS256) + v.AuxInt = int8ToAuxInt(4) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpNotEqualFloat64x2(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (NotEqualFloat64x2 x y) + // result: (VCMPPD128 [4] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VCMPPD128) + v.AuxInt = int8ToAuxInt(4) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpNotEqualFloat64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (NotEqualFloat64x4 x y) + // result: (VCMPPD256 [4] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VCMPPD256) + v.AuxInt = int8ToAuxInt(4) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpNotEqualFloat64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (NotEqualFloat64x8 x y) + // result: (VPMOVMToVec64x8 (VCMPPD512 [4] x y)) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPMOVMToVec64x8) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask) + v0.AuxInt = int8ToAuxInt(4) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedPairDotProdInt16x32(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpNotEqualInt16x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedPairDotProdInt16x32 x y mask) - // result: (VPMADDWDMasked512 x y (VPMOVVec16x32ToM mask)) + typ := &b.Func.Config.Types + // match: (NotEqualInt16x16 x y) + // result: (VPMOVMToVec16x16 (VPCMPW256 [4] x y)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMADDWDMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec16x16) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW256, typ.Mask) + v0.AuxInt = int8ToAuxInt(4) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedPairDotProdInt16x8(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpNotEqualInt16x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedPairDotProdInt16x8 x y mask) - // result: (VPMADDWDMasked128 x y (VPMOVVec16x8ToM mask)) + typ := &b.Func.Config.Types + // match: (NotEqualInt16x32 x y) + // result: (VPMOVMToVec16x32 (VPCMPW512 [4] x y)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPMADDWDMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec16x32) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW512, typ.Mask) + v0.AuxInt = int8ToAuxInt(4) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedPopCountInt16x16(v *Value) bool { +func rewriteValueAMD64_OpNotEqualInt16x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedPopCountInt16x16 x mask) - // result: (VPOPCNTWMasked256 x (VPMOVVec16x16ToM mask)) + typ := &b.Func.Config.Types + // match: (NotEqualInt16x8 x y) + // result: (VPMOVMToVec16x8 (VPCMPW128 [4] x y)) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VPOPCNTWMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + v.reset(OpAMD64VPMOVMToVec16x8) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW128, typ.Mask) + v0.AuxInt = int8ToAuxInt(4) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedPopCountInt16x32(v *Value) bool { +func rewriteValueAMD64_OpNotEqualInt32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedPopCountInt16x32 x mask) - // result: (VPOPCNTWMasked512 x (VPMOVVec16x32ToM mask)) + typ := &b.Func.Config.Types + // match: (NotEqualInt32x16 x y) + // result: (VPMOVMToVec32x16 (VPCMPD512 [4] x y)) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VPOPCNTWMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + v.reset(OpAMD64VPMOVMToVec32x16) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD512, typ.Mask) + v0.AuxInt = int8ToAuxInt(4) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedPopCountInt16x8(v *Value) bool { +func rewriteValueAMD64_OpNotEqualInt32x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedPopCountInt16x8 x mask) - // result: (VPOPCNTWMasked128 x (VPMOVVec16x8ToM mask)) + typ := &b.Func.Config.Types + // match: (NotEqualInt32x4 x y) + // result: (VPMOVMToVec32x4 (VPCMPD128 [4] x y)) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VPOPCNTWMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + v.reset(OpAMD64VPMOVMToVec32x4) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD128, typ.Mask) + v0.AuxInt = int8ToAuxInt(4) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedPopCountInt32x16(v *Value) bool { +func rewriteValueAMD64_OpNotEqualInt32x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedPopCountInt32x16 x mask) - // result: (VPOPCNTDMasked512 x (VPMOVVec32x16ToM mask)) + typ := &b.Func.Config.Types + // match: (NotEqualInt32x8 x y) + // result: (VPMOVMToVec32x8 (VPCMPD256 [4] x y)) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VPOPCNTDMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + v.reset(OpAMD64VPMOVMToVec32x8) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD256, typ.Mask) + v0.AuxInt = int8ToAuxInt(4) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedPopCountInt32x4(v *Value) bool { +func rewriteValueAMD64_OpNotEqualInt64x2(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedPopCountInt32x4 x mask) - // result: (VPOPCNTDMasked128 x (VPMOVVec32x4ToM mask)) + typ := &b.Func.Config.Types + // match: (NotEqualInt64x2 x y) + // result: (VPMOVMToVec64x2 (VPCMPQ128 [4] x y)) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VPOPCNTDMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + v.reset(OpAMD64VPMOVMToVec64x2) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ128, typ.Mask) + v0.AuxInt = int8ToAuxInt(4) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedPopCountInt32x8(v *Value) bool { +func rewriteValueAMD64_OpNotEqualInt64x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedPopCountInt32x8 x mask) - // result: (VPOPCNTDMasked256 x (VPMOVVec32x8ToM mask)) + typ := &b.Func.Config.Types + // match: (NotEqualInt64x4 x y) + // result: (VPMOVMToVec64x4 (VPCMPQ256 [4] x y)) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VPOPCNTDMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + v.reset(OpAMD64VPMOVMToVec64x4) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ256, typ.Mask) + v0.AuxInt = int8ToAuxInt(4) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedPopCountInt64x2(v *Value) bool { +func rewriteValueAMD64_OpNotEqualInt64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedPopCountInt64x2 x mask) - // result: (VPOPCNTQMasked128 x (VPMOVVec64x2ToM mask)) + typ := &b.Func.Config.Types + // match: (NotEqualInt64x8 x y) + // result: (VPMOVMToVec64x8 (VPCMPQ512 [4] x y)) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VPOPCNTQMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + v.reset(OpAMD64VPMOVMToVec64x8) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ512, typ.Mask) + v0.AuxInt = int8ToAuxInt(4) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedPopCountInt64x4(v *Value) bool { +func rewriteValueAMD64_OpNotEqualInt8x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedPopCountInt64x4 x mask) - // result: (VPOPCNTQMasked256 x (VPMOVVec64x4ToM mask)) + typ := &b.Func.Config.Types + // match: (NotEqualInt8x16 x y) + // result: (VPMOVMToVec8x16 (VPCMPB128 [4] x y)) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VPOPCNTQMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + v.reset(OpAMD64VPMOVMToVec8x16) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB128, typ.Mask) + v0.AuxInt = int8ToAuxInt(4) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedPopCountInt64x8(v *Value) bool { +func rewriteValueAMD64_OpNotEqualInt8x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedPopCountInt64x8 x mask) - // result: (VPOPCNTQMasked512 x (VPMOVVec64x8ToM mask)) + typ := &b.Func.Config.Types + // match: (NotEqualInt8x32 x y) + // result: (VPMOVMToVec8x32 (VPCMPB256 [4] x y)) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VPOPCNTQMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + v.reset(OpAMD64VPMOVMToVec8x32) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB256, typ.Mask) + v0.AuxInt = int8ToAuxInt(4) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedPopCountInt8x16(v *Value) bool { +func rewriteValueAMD64_OpNotEqualInt8x64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedPopCountInt8x16 x mask) - // result: (VPOPCNTBMasked128 x (VPMOVVec8x16ToM mask)) + typ := &b.Func.Config.Types + // match: (NotEqualInt8x64 x y) + // result: (VPMOVMToVec8x64 (VPCMPB512 [4] x y)) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VPOPCNTBMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + v.reset(OpAMD64VPMOVMToVec8x64) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB512, typ.Mask) + v0.AuxInt = int8ToAuxInt(4) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedPopCountInt8x32(v *Value) bool { +func rewriteValueAMD64_OpNotEqualMaskedFloat32x16(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedPopCountInt8x32 x mask) - // result: (VPOPCNTBMasked256 x (VPMOVVec8x32ToM mask)) + typ := &b.Func.Config.Types + // match: (NotEqualMaskedFloat32x16 x y mask) + // result: (VPMOVMToVec32x16 (VCMPPSMasked512 [4] x y (VPMOVVec32x16ToM mask))) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VPOPCNTBMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPMOVMToVec32x16) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(4) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedPopCountInt8x64(v *Value) bool { +func rewriteValueAMD64_OpNotEqualMaskedFloat32x4(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedPopCountInt8x64 x mask) - // result: (VPOPCNTBMasked512 x (VPMOVVec8x64ToM mask)) + typ := &b.Func.Config.Types + // match: (NotEqualMaskedFloat32x4 x y mask) + // result: (VPMOVMToVec32x4 (VCMPPSMasked128 [4] x y (VPMOVVec32x4ToM mask))) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VPOPCNTBMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPMOVMToVec32x4) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(4) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedPopCountUint16x16(v *Value) bool { +func rewriteValueAMD64_OpNotEqualMaskedFloat32x8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedPopCountUint16x16 x mask) - // result: (VPOPCNTWMasked256 x (VPMOVVec16x16ToM mask)) + typ := &b.Func.Config.Types + // match: (NotEqualMaskedFloat32x8 x y mask) + // result: (VPMOVMToVec32x8 (VCMPPSMasked256 [4] x y (VPMOVVec32x8ToM mask))) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VPOPCNTWMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPMOVMToVec32x8) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(4) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedPopCountUint16x32(v *Value) bool { +func rewriteValueAMD64_OpNotEqualMaskedFloat64x2(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedPopCountUint16x32 x mask) - // result: (VPOPCNTWMasked512 x (VPMOVVec16x32ToM mask)) + typ := &b.Func.Config.Types + // match: (NotEqualMaskedFloat64x2 x y mask) + // result: (VPMOVMToVec64x2 (VCMPPDMasked128 [4] x y (VPMOVVec64x2ToM mask))) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VPOPCNTWMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPMOVMToVec64x2) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(4) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedPopCountUint16x8(v *Value) bool { +func rewriteValueAMD64_OpNotEqualMaskedFloat64x4(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedPopCountUint16x8 x mask) - // result: (VPOPCNTWMasked128 x (VPMOVVec16x8ToM mask)) + typ := &b.Func.Config.Types + // match: (NotEqualMaskedFloat64x4 x y mask) + // result: (VPMOVMToVec64x4 (VCMPPDMasked256 [4] x y (VPMOVVec64x4ToM mask))) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VPOPCNTWMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPMOVMToVec64x4) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(4) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedPopCountUint32x16(v *Value) bool { +func rewriteValueAMD64_OpNotEqualMaskedFloat64x8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedPopCountUint32x16 x mask) - // result: (VPOPCNTDMasked512 x (VPMOVVec32x16ToM mask)) + typ := &b.Func.Config.Types + // match: (NotEqualMaskedFloat64x8 x y mask) + // result: (VPMOVMToVec64x8 (VCMPPDMasked512 [4] x y (VPMOVVec64x8ToM mask))) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VPOPCNTDMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPMOVMToVec64x8) + v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(4) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedPopCountUint32x4(v *Value) bool { +func rewriteValueAMD64_OpNotEqualMaskedInt16x16(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedPopCountUint32x4 x mask) - // result: (VPOPCNTDMasked128 x (VPMOVVec32x4ToM mask)) + typ := &b.Func.Config.Types + // match: (NotEqualMaskedInt16x16 x y mask) + // result: (VPMOVMToVec16x16 (VPCMPWMasked256 [4] x y (VPMOVVec16x16ToM mask))) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VPOPCNTDMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPMOVMToVec16x16) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(4) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedPopCountUint32x8(v *Value) bool { +func rewriteValueAMD64_OpNotEqualMaskedInt16x32(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedPopCountUint32x8 x mask) - // result: (VPOPCNTDMasked256 x (VPMOVVec32x8ToM mask)) + typ := &b.Func.Config.Types + // match: (NotEqualMaskedInt16x32 x y mask) + // result: (VPMOVMToVec16x32 (VPCMPWMasked512 [4] x y (VPMOVVec16x32ToM mask))) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VPOPCNTDMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPMOVMToVec16x32) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(4) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedPopCountUint64x2(v *Value) bool { +func rewriteValueAMD64_OpNotEqualMaskedInt16x8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedPopCountUint64x2 x mask) - // result: (VPOPCNTQMasked128 x (VPMOVVec64x2ToM mask)) + typ := &b.Func.Config.Types + // match: (NotEqualMaskedInt16x8 x y mask) + // result: (VPMOVMToVec16x8 (VPCMPWMasked128 [4] x y (VPMOVVec16x8ToM mask))) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VPOPCNTQMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPMOVMToVec16x8) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(4) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedPopCountUint64x4(v *Value) bool { +func rewriteValueAMD64_OpNotEqualMaskedInt32x16(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedPopCountUint64x4 x mask) - // result: (VPOPCNTQMasked256 x (VPMOVVec64x4ToM mask)) + typ := &b.Func.Config.Types + // match: (NotEqualMaskedInt32x16 x y mask) + // result: (VPMOVMToVec32x16 (VPCMPDMasked512 [4] x y (VPMOVVec32x16ToM mask))) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VPOPCNTQMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPMOVMToVec32x16) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(4) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedPopCountUint64x8(v *Value) bool { +func rewriteValueAMD64_OpNotEqualMaskedInt32x4(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedPopCountUint64x8 x mask) - // result: (VPOPCNTQMasked512 x (VPMOVVec64x8ToM mask)) + typ := &b.Func.Config.Types + // match: (NotEqualMaskedInt32x4 x y mask) + // result: (VPMOVMToVec32x4 (VPCMPDMasked128 [4] x y (VPMOVVec32x4ToM mask))) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VPOPCNTQMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPMOVMToVec32x4) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(4) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedPopCountUint8x16(v *Value) bool { +func rewriteValueAMD64_OpNotEqualMaskedInt32x8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedPopCountUint8x16 x mask) - // result: (VPOPCNTBMasked128 x (VPMOVVec8x16ToM mask)) + typ := &b.Func.Config.Types + // match: (NotEqualMaskedInt32x8 x y mask) + // result: (VPMOVMToVec32x8 (VPCMPDMasked256 [4] x y (VPMOVVec32x8ToM mask))) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VPOPCNTBMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPMOVMToVec32x8) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(4) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedPopCountUint8x32(v *Value) bool { +func rewriteValueAMD64_OpNotEqualMaskedInt64x2(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedPopCountUint8x32 x mask) - // result: (VPOPCNTBMasked256 x (VPMOVVec8x32ToM mask)) + typ := &b.Func.Config.Types + // match: (NotEqualMaskedInt64x2 x y mask) + // result: (VPMOVMToVec64x2 (VPCMPQMasked128 [4] x y (VPMOVVec64x2ToM mask))) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VPOPCNTBMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPMOVMToVec64x2) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(4) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedPopCountUint8x64(v *Value) bool { +func rewriteValueAMD64_OpNotEqualMaskedInt64x4(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedPopCountUint8x64 x mask) - // result: (VPOPCNTBMasked512 x (VPMOVVec8x64ToM mask)) + typ := &b.Func.Config.Types + // match: (NotEqualMaskedInt64x4 x y mask) + // result: (VPMOVMToVec64x4 (VPCMPQMasked256 [4] x y (VPMOVVec64x4ToM mask))) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VPOPCNTBMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPMOVMToVec64x4) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(4) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedRotateAllLeftInt32x16(v *Value) bool { +func rewriteValueAMD64_OpNotEqualMaskedInt64x8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedRotateAllLeftInt32x16 [a] x mask) - // result: (VPROLDMasked512 [a] x (VPMOVVec32x16ToM mask)) + typ := &b.Func.Config.Types + // match: (NotEqualMaskedInt64x8 x y mask) + // result: (VPMOVMToVec64x8 (VPCMPQMasked512 [4] x y (VPMOVVec64x8ToM mask))) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VPROLDMasked512) - v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPMOVMToVec64x8) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(4) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedRotateAllLeftInt32x4(v *Value) bool { +func rewriteValueAMD64_OpNotEqualMaskedInt8x16(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedRotateAllLeftInt32x4 [a] x mask) - // result: (VPROLDMasked128 [a] x (VPMOVVec32x4ToM mask)) + typ := &b.Func.Config.Types + // match: (NotEqualMaskedInt8x16 x y mask) + // result: (VPMOVMToVec8x16 (VPCMPBMasked128 [4] x y (VPMOVVec8x16ToM mask))) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VPROLDMasked128) - v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPMOVMToVec8x16) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(4) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedRotateAllLeftInt32x8(v *Value) bool { +func rewriteValueAMD64_OpNotEqualMaskedInt8x32(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedRotateAllLeftInt32x8 [a] x mask) - // result: (VPROLDMasked256 [a] x (VPMOVVec32x8ToM mask)) + typ := &b.Func.Config.Types + // match: (NotEqualMaskedInt8x32 x y mask) + // result: (VPMOVMToVec8x32 (VPCMPBMasked256 [4] x y (VPMOVVec8x32ToM mask))) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VPROLDMasked256) - v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPMOVMToVec8x32) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(4) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedRotateAllLeftInt64x2(v *Value) bool { +func rewriteValueAMD64_OpNotEqualMaskedInt8x64(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedRotateAllLeftInt64x2 [a] x mask) - // result: (VPROLQMasked128 [a] x (VPMOVVec64x2ToM mask)) + typ := &b.Func.Config.Types + // match: (NotEqualMaskedInt8x64 x y mask) + // result: (VPMOVMToVec8x64 (VPCMPBMasked512 [4] x y (VPMOVVec8x64ToM mask))) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VPROLQMasked128) - v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPMOVMToVec8x64) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(4) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedRotateAllLeftInt64x4(v *Value) bool { +func rewriteValueAMD64_OpNotEqualMaskedUint16x16(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedRotateAllLeftInt64x4 [a] x mask) - // result: (VPROLQMasked256 [a] x (VPMOVVec64x4ToM mask)) + typ := &b.Func.Config.Types + // match: (NotEqualMaskedUint16x16 x y mask) + // result: (VPMOVMToVec16x16 (VPCMPUWMasked256 [4] x y (VPMOVVec16x16ToM mask))) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VPROLQMasked256) - v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPMOVMToVec16x16) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(4) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedRotateAllLeftInt64x8(v *Value) bool { +func rewriteValueAMD64_OpNotEqualMaskedUint16x32(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedRotateAllLeftInt64x8 [a] x mask) - // result: (VPROLQMasked512 [a] x (VPMOVVec64x8ToM mask)) + typ := &b.Func.Config.Types + // match: (NotEqualMaskedUint16x32 x y mask) + // result: (VPMOVMToVec16x32 (VPCMPUWMasked512 [4] x y (VPMOVVec16x32ToM mask))) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VPROLQMasked512) - v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPMOVMToVec16x32) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(4) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedRotateAllLeftUint32x16(v *Value) bool { +func rewriteValueAMD64_OpNotEqualMaskedUint16x8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedRotateAllLeftUint32x16 [a] x mask) - // result: (VPROLDMasked512 [a] x (VPMOVVec32x16ToM mask)) + typ := &b.Func.Config.Types + // match: (NotEqualMaskedUint16x8 x y mask) + // result: (VPMOVMToVec16x8 (VPCMPUWMasked128 [4] x y (VPMOVVec16x8ToM mask))) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VPROLDMasked512) - v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPMOVMToVec16x8) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(4) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedRotateAllLeftUint32x4(v *Value) bool { +func rewriteValueAMD64_OpNotEqualMaskedUint32x16(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedRotateAllLeftUint32x4 [a] x mask) - // result: (VPROLDMasked128 [a] x (VPMOVVec32x4ToM mask)) + typ := &b.Func.Config.Types + // match: (NotEqualMaskedUint32x16 x y mask) + // result: (VPMOVMToVec32x16 (VPCMPUDMasked512 [4] x y (VPMOVVec32x16ToM mask))) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VPROLDMasked128) - v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPMOVMToVec32x16) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(4) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedRotateAllLeftUint32x8(v *Value) bool { +func rewriteValueAMD64_OpNotEqualMaskedUint32x4(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedRotateAllLeftUint32x8 [a] x mask) - // result: (VPROLDMasked256 [a] x (VPMOVVec32x8ToM mask)) + typ := &b.Func.Config.Types + // match: (NotEqualMaskedUint32x4 x y mask) + // result: (VPMOVMToVec32x4 (VPCMPUDMasked128 [4] x y (VPMOVVec32x4ToM mask))) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VPROLDMasked256) - v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPMOVMToVec32x4) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(4) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedRotateAllLeftUint64x2(v *Value) bool { +func rewriteValueAMD64_OpNotEqualMaskedUint32x8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedRotateAllLeftUint64x2 [a] x mask) - // result: (VPROLQMasked128 [a] x (VPMOVVec64x2ToM mask)) + typ := &b.Func.Config.Types + // match: (NotEqualMaskedUint32x8 x y mask) + // result: (VPMOVMToVec32x8 (VPCMPUDMasked256 [4] x y (VPMOVVec32x8ToM mask))) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VPROLQMasked128) - v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPMOVMToVec32x8) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(4) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedRotateAllLeftUint64x4(v *Value) bool { +func rewriteValueAMD64_OpNotEqualMaskedUint64x2(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedRotateAllLeftUint64x4 [a] x mask) - // result: (VPROLQMasked256 [a] x (VPMOVVec64x4ToM mask)) + typ := &b.Func.Config.Types + // match: (NotEqualMaskedUint64x2 x y mask) + // result: (VPMOVMToVec64x2 (VPCMPUQMasked128 [4] x y (VPMOVVec64x2ToM mask))) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VPROLQMasked256) - v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPMOVMToVec64x2) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(4) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedRotateAllLeftUint64x8(v *Value) bool { +func rewriteValueAMD64_OpNotEqualMaskedUint64x4(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedRotateAllLeftUint64x8 [a] x mask) - // result: (VPROLQMasked512 [a] x (VPMOVVec64x8ToM mask)) + typ := &b.Func.Config.Types + // match: (NotEqualMaskedUint64x4 x y mask) + // result: (VPMOVMToVec64x4 (VPCMPUQMasked256 [4] x y (VPMOVVec64x4ToM mask))) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VPROLQMasked512) - v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPMOVMToVec64x4) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(4) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedRotateAllRightInt32x16(v *Value) bool { +func rewriteValueAMD64_OpNotEqualMaskedUint64x8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedRotateAllRightInt32x16 [a] x mask) - // result: (VPRORDMasked512 [a] x (VPMOVVec32x16ToM mask)) + typ := &b.Func.Config.Types + // match: (NotEqualMaskedUint64x8 x y mask) + // result: (VPMOVMToVec64x8 (VPCMPUQMasked512 [4] x y (VPMOVVec64x8ToM mask))) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VPRORDMasked512) - v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPMOVMToVec64x8) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(4) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedRotateAllRightInt32x4(v *Value) bool { +func rewriteValueAMD64_OpNotEqualMaskedUint8x16(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedRotateAllRightInt32x4 [a] x mask) - // result: (VPRORDMasked128 [a] x (VPMOVVec32x4ToM mask)) + typ := &b.Func.Config.Types + // match: (NotEqualMaskedUint8x16 x y mask) + // result: (VPMOVMToVec8x16 (VPCMPUBMasked128 [4] x y (VPMOVVec8x16ToM mask))) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VPRORDMasked128) - v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPMOVMToVec8x16) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked128, typ.Mask) + v0.AuxInt = int8ToAuxInt(4) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedRotateAllRightInt32x8(v *Value) bool { +func rewriteValueAMD64_OpNotEqualMaskedUint8x32(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedRotateAllRightInt32x8 [a] x mask) - // result: (VPRORDMasked256 [a] x (VPMOVVec32x8ToM mask)) + typ := &b.Func.Config.Types + // match: (NotEqualMaskedUint8x32 x y mask) + // result: (VPMOVMToVec8x32 (VPCMPUBMasked256 [4] x y (VPMOVVec8x32ToM mask))) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VPRORDMasked256) - v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPMOVMToVec8x32) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked256, typ.Mask) + v0.AuxInt = int8ToAuxInt(4) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedRotateAllRightInt64x2(v *Value) bool { +func rewriteValueAMD64_OpNotEqualMaskedUint8x64(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedRotateAllRightInt64x2 [a] x mask) - // result: (VPRORQMasked128 [a] x (VPMOVVec64x2ToM mask)) + typ := &b.Func.Config.Types + // match: (NotEqualMaskedUint8x64 x y mask) + // result: (VPMOVMToVec8x64 (VPCMPUBMasked512 [4] x y (VPMOVVec8x64ToM mask))) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VPRORQMasked128) - v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPMOVMToVec8x64) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked512, typ.Mask) + v0.AuxInt = int8ToAuxInt(4) + v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) + v1.AddArg(mask) + v0.AddArg3(x, y, v1) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedRotateAllRightInt64x4(v *Value) bool { +func rewriteValueAMD64_OpNotEqualUint16x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedRotateAllRightInt64x4 [a] x mask) - // result: (VPRORQMasked256 [a] x (VPMOVVec64x4ToM mask)) + typ := &b.Func.Config.Types + // match: (NotEqualUint16x16 x y) + // result: (VPMOVMToVec16x16 (VPCMPUW256 [4] x y)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VPRORQMasked256) - v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + v.reset(OpAMD64VPMOVMToVec16x16) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW256, typ.Mask) + v0.AuxInt = int8ToAuxInt(4) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedRotateAllRightInt64x8(v *Value) bool { +func rewriteValueAMD64_OpNotEqualUint16x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedRotateAllRightInt64x8 [a] x mask) - // result: (VPRORQMasked512 [a] x (VPMOVVec64x8ToM mask)) + typ := &b.Func.Config.Types + // match: (NotEqualUint16x32 x y) + // result: (VPMOVMToVec16x32 (VPCMPUW512 [4] x y)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VPRORQMasked512) - v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + v.reset(OpAMD64VPMOVMToVec16x32) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW512, typ.Mask) + v0.AuxInt = int8ToAuxInt(4) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedRotateAllRightUint32x16(v *Value) bool { +func rewriteValueAMD64_OpNotEqualUint16x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedRotateAllRightUint32x16 [a] x mask) - // result: (VPRORDMasked512 [a] x (VPMOVVec32x16ToM mask)) + typ := &b.Func.Config.Types + // match: (NotEqualUint16x8 x y) + // result: (VPMOVMToVec16x8 (VPCMPUW128 [4] x y)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VPRORDMasked512) - v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + v.reset(OpAMD64VPMOVMToVec16x8) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW128, typ.Mask) + v0.AuxInt = int8ToAuxInt(4) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedRotateAllRightUint32x4(v *Value) bool { +func rewriteValueAMD64_OpNotEqualUint32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedRotateAllRightUint32x4 [a] x mask) - // result: (VPRORDMasked128 [a] x (VPMOVVec32x4ToM mask)) + typ := &b.Func.Config.Types + // match: (NotEqualUint32x16 x y) + // result: (VPMOVMToVec32x16 (VPCMPUD512 [4] x y)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VPRORDMasked128) - v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + v.reset(OpAMD64VPMOVMToVec32x16) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD512, typ.Mask) + v0.AuxInt = int8ToAuxInt(4) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedRotateAllRightUint32x8(v *Value) bool { +func rewriteValueAMD64_OpNotEqualUint32x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedRotateAllRightUint32x8 [a] x mask) - // result: (VPRORDMasked256 [a] x (VPMOVVec32x8ToM mask)) + typ := &b.Func.Config.Types + // match: (NotEqualUint32x4 x y) + // result: (VPMOVMToVec32x4 (VPCMPUD128 [4] x y)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VPRORDMasked256) - v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + v.reset(OpAMD64VPMOVMToVec32x4) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD128, typ.Mask) + v0.AuxInt = int8ToAuxInt(4) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedRotateAllRightUint64x2(v *Value) bool { +func rewriteValueAMD64_OpNotEqualUint32x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedRotateAllRightUint64x2 [a] x mask) - // result: (VPRORQMasked128 [a] x (VPMOVVec64x2ToM mask)) + typ := &b.Func.Config.Types + // match: (NotEqualUint32x8 x y) + // result: (VPMOVMToVec32x8 (VPCMPUD256 [4] x y)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VPRORQMasked128) - v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + v.reset(OpAMD64VPMOVMToVec32x8) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD256, typ.Mask) + v0.AuxInt = int8ToAuxInt(4) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedRotateAllRightUint64x4(v *Value) bool { +func rewriteValueAMD64_OpNotEqualUint64x2(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedRotateAllRightUint64x4 [a] x mask) - // result: (VPRORQMasked256 [a] x (VPMOVVec64x4ToM mask)) + typ := &b.Func.Config.Types + // match: (NotEqualUint64x2 x y) + // result: (VPMOVMToVec64x2 (VPCMPUQ128 [4] x y)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VPRORQMasked256) - v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + v.reset(OpAMD64VPMOVMToVec64x2) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ128, typ.Mask) + v0.AuxInt = int8ToAuxInt(4) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedRotateAllRightUint64x8(v *Value) bool { +func rewriteValueAMD64_OpNotEqualUint64x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedRotateAllRightUint64x8 [a] x mask) - // result: (VPRORQMasked512 [a] x (VPMOVVec64x8ToM mask)) + typ := &b.Func.Config.Types + // match: (NotEqualUint64x4 x y) + // result: (VPMOVMToVec64x4 (VPCMPUQ256 [4] x y)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - mask := v_1 - v.reset(OpAMD64VPRORQMasked512) - v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + y := v_1 + v.reset(OpAMD64VPMOVMToVec64x4) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ256, typ.Mask) + v0.AuxInt = int8ToAuxInt(4) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedRotateLeftInt32x16(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpNotEqualUint64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedRotateLeftInt32x16 x y mask) - // result: (VPROLVDMasked512 x y (VPMOVVec32x16ToM mask)) + typ := &b.Func.Config.Types + // match: (NotEqualUint64x8 x y) + // result: (VPMOVMToVec64x8 (VPCMPUQ512 [4] x y)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPROLVDMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec64x8) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ512, typ.Mask) + v0.AuxInt = int8ToAuxInt(4) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedRotateLeftInt32x4(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpNotEqualUint8x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedRotateLeftInt32x4 x y mask) - // result: (VPROLVDMasked128 x y (VPMOVVec32x4ToM mask)) + typ := &b.Func.Config.Types + // match: (NotEqualUint8x16 x y) + // result: (VPMOVMToVec8x16 (VPCMPUB128 [4] x y)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPROLVDMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec8x16) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB128, typ.Mask) + v0.AuxInt = int8ToAuxInt(4) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedRotateLeftInt32x8(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpNotEqualUint8x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedRotateLeftInt32x8 x y mask) - // result: (VPROLVDMasked256 x y (VPMOVVec32x8ToM mask)) + typ := &b.Func.Config.Types + // match: (NotEqualUint8x32 x y) + // result: (VPMOVMToVec8x32 (VPCMPUB256 [4] x y)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPROLVDMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec8x32) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB256, typ.Mask) + v0.AuxInt = int8ToAuxInt(4) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedRotateLeftInt64x2(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpNotEqualUint8x64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedRotateLeftInt64x2 x y mask) - // result: (VPROLVQMasked128 x y (VPMOVVec64x2ToM mask)) + typ := &b.Func.Config.Types + // match: (NotEqualUint8x64 x y) + // result: (VPMOVMToVec8x64 (VPCMPUB512 [4] x y)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPROLVQMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPMOVMToVec8x64) + v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB512, typ.Mask) + v0.AuxInt = int8ToAuxInt(4) + v0.AddArg2(x, y) + v.AddArg(v0) return true } } -func rewriteValueAMD64_OpMaskedRotateLeftInt64x4(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpOffPtr(v *Value) bool { v_0 := v.Args[0] b := v.Block - // match: (MaskedRotateLeftInt64x4 x y mask) - // result: (VPROLVQMasked256 x y (VPMOVVec64x4ToM mask)) + typ := &b.Func.Config.Types + // match: (OffPtr [off] ptr) + // cond: is32Bit(off) + // result: (ADDQconst [int32(off)] ptr) for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPROLVQMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + off := auxIntToInt64(v.AuxInt) + ptr := v_0 + if !(is32Bit(off)) { + break + } + v.reset(OpAMD64ADDQconst) + v.AuxInt = int32ToAuxInt(int32(off)) + v.AddArg(ptr) return true } -} -func rewriteValueAMD64_OpMaskedRotateLeftInt64x8(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedRotateLeftInt64x8 x y mask) - // result: (VPROLVQMasked512 x y (VPMOVVec64x8ToM mask)) + // match: (OffPtr [off] ptr) + // result: (ADDQ (MOVQconst [off]) ptr) for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPROLVQMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + off := auxIntToInt64(v.AuxInt) + ptr := v_0 + v.reset(OpAMD64ADDQ) + v0 := b.NewValue0(v.Pos, OpAMD64MOVQconst, typ.UInt64) + v0.AuxInt = int64ToAuxInt(off) + v.AddArg2(v0, ptr) return true } } -func rewriteValueAMD64_OpMaskedRotateLeftUint32x16(v *Value) bool { +func rewriteValueAMD64_OpOrMaskedInt32x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedRotateLeftUint32x16 x y mask) - // result: (VPROLVDMasked512 x y (VPMOVVec32x16ToM mask)) + // match: (OrMaskedInt32x16 x y mask) + // result: (VPORDMasked512 x y (VPMOVVec32x16ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPROLVDMasked512) + v.reset(OpAMD64VPORDMasked512) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedRotateLeftUint32x4(v *Value) bool { +func rewriteValueAMD64_OpOrMaskedInt32x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedRotateLeftUint32x4 x y mask) - // result: (VPROLVDMasked128 x y (VPMOVVec32x4ToM mask)) + // match: (OrMaskedInt32x4 x y mask) + // result: (VPORDMasked128 x y (VPMOVVec32x4ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPROLVDMasked128) + v.reset(OpAMD64VPORDMasked128) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedRotateLeftUint32x8(v *Value) bool { +func rewriteValueAMD64_OpOrMaskedInt32x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedRotateLeftUint32x8 x y mask) - // result: (VPROLVDMasked256 x y (VPMOVVec32x8ToM mask)) + // match: (OrMaskedInt32x8 x y mask) + // result: (VPORDMasked256 x y (VPMOVVec32x8ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPROLVDMasked256) + v.reset(OpAMD64VPORDMasked256) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedRotateLeftUint64x2(v *Value) bool { +func rewriteValueAMD64_OpOrMaskedInt64x2(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedRotateLeftUint64x2 x y mask) - // result: (VPROLVQMasked128 x y (VPMOVVec64x2ToM mask)) + // match: (OrMaskedInt64x2 x y mask) + // result: (VPORQMasked128 x y (VPMOVVec64x2ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPROLVQMasked128) + v.reset(OpAMD64VPORQMasked128) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedRotateLeftUint64x4(v *Value) bool { +func rewriteValueAMD64_OpOrMaskedInt64x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedRotateLeftUint64x4 x y mask) - // result: (VPROLVQMasked256 x y (VPMOVVec64x4ToM mask)) + // match: (OrMaskedInt64x4 x y mask) + // result: (VPORQMasked256 x y (VPMOVVec64x4ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPROLVQMasked256) + v.reset(OpAMD64VPORQMasked256) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedRotateLeftUint64x8(v *Value) bool { +func rewriteValueAMD64_OpOrMaskedInt64x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedRotateLeftUint64x8 x y mask) - // result: (VPROLVQMasked512 x y (VPMOVVec64x8ToM mask)) + // match: (OrMaskedInt64x8 x y mask) + // result: (VPORQMasked512 x y (VPMOVVec64x8ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPROLVQMasked512) + v.reset(OpAMD64VPORQMasked512) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedRotateRightInt32x16(v *Value) bool { +func rewriteValueAMD64_OpOrMaskedUint32x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedRotateRightInt32x16 x y mask) - // result: (VPRORVDMasked512 x y (VPMOVVec32x16ToM mask)) + // match: (OrMaskedUint32x16 x y mask) + // result: (VPORDMasked512 x y (VPMOVVec32x16ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPRORVDMasked512) + v.reset(OpAMD64VPORDMasked512) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedRotateRightInt32x4(v *Value) bool { +func rewriteValueAMD64_OpOrMaskedUint32x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedRotateRightInt32x4 x y mask) - // result: (VPRORVDMasked128 x y (VPMOVVec32x4ToM mask)) + // match: (OrMaskedUint32x4 x y mask) + // result: (VPORDMasked128 x y (VPMOVVec32x4ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPRORVDMasked128) + v.reset(OpAMD64VPORDMasked128) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedRotateRightInt32x8(v *Value) bool { +func rewriteValueAMD64_OpOrMaskedUint32x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedRotateRightInt32x8 x y mask) - // result: (VPRORVDMasked256 x y (VPMOVVec32x8ToM mask)) + // match: (OrMaskedUint32x8 x y mask) + // result: (VPORDMasked256 x y (VPMOVVec32x8ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPRORVDMasked256) + v.reset(OpAMD64VPORDMasked256) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedRotateRightInt64x2(v *Value) bool { +func rewriteValueAMD64_OpOrMaskedUint64x2(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedRotateRightInt64x2 x y mask) - // result: (VPRORVQMasked128 x y (VPMOVVec64x2ToM mask)) + // match: (OrMaskedUint64x2 x y mask) + // result: (VPORQMasked128 x y (VPMOVVec64x2ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPRORVQMasked128) + v.reset(OpAMD64VPORQMasked128) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedRotateRightInt64x4(v *Value) bool { +func rewriteValueAMD64_OpOrMaskedUint64x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedRotateRightInt64x4 x y mask) - // result: (VPRORVQMasked256 x y (VPMOVVec64x4ToM mask)) + // match: (OrMaskedUint64x4 x y mask) + // result: (VPORQMasked256 x y (VPMOVVec64x4ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPRORVQMasked256) + v.reset(OpAMD64VPORQMasked256) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedRotateRightInt64x8(v *Value) bool { +func rewriteValueAMD64_OpOrMaskedUint64x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedRotateRightInt64x8 x y mask) - // result: (VPRORVQMasked512 x y (VPMOVVec64x8ToM mask)) + // match: (OrMaskedUint64x8 x y mask) + // result: (VPORQMasked512 x y (VPMOVVec64x8ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPRORVQMasked512) + v.reset(OpAMD64VPORQMasked512) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedRotateRightUint32x16(v *Value) bool { +func rewriteValueAMD64_OpPairDotProdAccumulateMaskedInt32x16(v *Value) bool { + v_3 := v.Args[3] v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedRotateRightUint32x16 x y mask) - // result: (VPRORVDMasked512 x y (VPMOVVec32x16ToM mask)) + // match: (PairDotProdAccumulateMaskedInt32x16 x y z mask) + // result: (VPDPWSSDMasked512 x y z (VPMOVVec32x16ToM mask)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPRORVDMasked512) + z := v_2 + mask := v_3 + v.reset(OpAMD64VPDPWSSDMasked512) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.AddArg4(x, y, z, v0) return true } } -func rewriteValueAMD64_OpMaskedRotateRightUint32x4(v *Value) bool { +func rewriteValueAMD64_OpPairDotProdAccumulateMaskedInt32x4(v *Value) bool { + v_3 := v.Args[3] v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedRotateRightUint32x4 x y mask) - // result: (VPRORVDMasked128 x y (VPMOVVec32x4ToM mask)) + // match: (PairDotProdAccumulateMaskedInt32x4 x y z mask) + // result: (VPDPWSSDMasked128 x y z (VPMOVVec32x4ToM mask)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPRORVDMasked128) + z := v_2 + mask := v_3 + v.reset(OpAMD64VPDPWSSDMasked128) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.AddArg4(x, y, z, v0) return true } } -func rewriteValueAMD64_OpMaskedRotateRightUint32x8(v *Value) bool { +func rewriteValueAMD64_OpPairDotProdAccumulateMaskedInt32x8(v *Value) bool { + v_3 := v.Args[3] v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedRotateRightUint32x8 x y mask) - // result: (VPRORVDMasked256 x y (VPMOVVec32x8ToM mask)) + // match: (PairDotProdAccumulateMaskedInt32x8 x y z mask) + // result: (VPDPWSSDMasked256 x y z (VPMOVVec32x8ToM mask)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPRORVDMasked256) + z := v_2 + mask := v_3 + v.reset(OpAMD64VPDPWSSDMasked256) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.AddArg4(x, y, z, v0) return true } } -func rewriteValueAMD64_OpMaskedRotateRightUint64x2(v *Value) bool { +func rewriteValueAMD64_OpPairDotProdMaskedInt16x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedRotateRightUint64x2 x y mask) - // result: (VPRORVQMasked128 x y (VPMOVVec64x2ToM mask)) + // match: (PairDotProdMaskedInt16x16 x y mask) + // result: (VPMADDWDMasked256 x y (VPMOVVec16x16ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPRORVQMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v.reset(OpAMD64VPMADDWDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedRotateRightUint64x4(v *Value) bool { +func rewriteValueAMD64_OpPairDotProdMaskedInt16x32(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedRotateRightUint64x4 x y mask) - // result: (VPRORVQMasked256 x y (VPMOVVec64x4ToM mask)) + // match: (PairDotProdMaskedInt16x32 x y mask) + // result: (VPMADDWDMasked512 x y (VPMOVVec16x32ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPRORVQMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VPMADDWDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedRotateRightUint64x8(v *Value) bool { +func rewriteValueAMD64_OpPairDotProdMaskedInt16x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedRotateRightUint64x8 x y mask) - // result: (VPRORVQMasked512 x y (VPMOVVec64x8ToM mask)) + // match: (PairDotProdMaskedInt16x8 x y mask) + // result: (VPMADDWDMasked128 x y (VPMOVVec16x8ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPRORVQMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v.reset(OpAMD64VPMADDWDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedRoundWithPrecisionFloat32x16(v *Value) bool { +func rewriteValueAMD64_OpPanicBounds(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (PanicBounds [kind] x y mem) + // cond: boundsABI(kind) == 0 + // result: (LoweredPanicBoundsA [kind] x y mem) + for { + kind := auxIntToInt64(v.AuxInt) + x := v_0 + y := v_1 + mem := v_2 + if !(boundsABI(kind) == 0) { + break + } + v.reset(OpAMD64LoweredPanicBoundsA) + v.AuxInt = int64ToAuxInt(kind) + v.AddArg3(x, y, mem) + return true + } + // match: (PanicBounds [kind] x y mem) + // cond: boundsABI(kind) == 1 + // result: (LoweredPanicBoundsB [kind] x y mem) + for { + kind := auxIntToInt64(v.AuxInt) + x := v_0 + y := v_1 + mem := v_2 + if !(boundsABI(kind) == 1) { + break + } + v.reset(OpAMD64LoweredPanicBoundsB) + v.AuxInt = int64ToAuxInt(kind) + v.AddArg3(x, y, mem) + return true + } + // match: (PanicBounds [kind] x y mem) + // cond: boundsABI(kind) == 2 + // result: (LoweredPanicBoundsC [kind] x y mem) + for { + kind := auxIntToInt64(v.AuxInt) + x := v_0 + y := v_1 + mem := v_2 + if !(boundsABI(kind) == 2) { + break + } + v.reset(OpAMD64LoweredPanicBoundsC) + v.AuxInt = int64ToAuxInt(kind) + v.AddArg3(x, y, mem) + return true + } + return false +} +func rewriteValueAMD64_OpPopCount16(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (PopCount16 x) + // result: (POPCNTL (MOVWQZX x)) + for { + x := v_0 + v.reset(OpAMD64POPCNTL) + v0 := b.NewValue0(v.Pos, OpAMD64MOVWQZX, typ.UInt32) + v0.AddArg(x) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpPopCount8(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (PopCount8 x) + // result: (POPCNTL (MOVBQZX x)) + for { + x := v_0 + v.reset(OpAMD64POPCNTL) + v0 := b.NewValue0(v.Pos, OpAMD64MOVBQZX, typ.UInt32) + v0.AddArg(x) + v.AddArg(v0) + return true + } +} +func rewriteValueAMD64_OpPopCountMaskedInt16x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedRoundWithPrecisionFloat32x16 [a] x mask) - // result: (VRNDSCALEPSMasked512 [a+0] x (VPMOVVec32x16ToM mask)) + // match: (PopCountMaskedInt16x16 x mask) + // result: (VPOPCNTWMasked256 x (VPMOVVec16x16ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 mask := v_1 - v.reset(OpAMD64VRNDSCALEPSMasked512) - v.AuxInt = int8ToAuxInt(a + 0) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v.reset(OpAMD64VPOPCNTWMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpMaskedRoundWithPrecisionFloat32x4(v *Value) bool { +func rewriteValueAMD64_OpPopCountMaskedInt16x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedRoundWithPrecisionFloat32x4 [a] x mask) - // result: (VRNDSCALEPSMasked128 [a+0] x (VPMOVVec32x4ToM mask)) + // match: (PopCountMaskedInt16x32 x mask) + // result: (VPOPCNTWMasked512 x (VPMOVVec16x32ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 mask := v_1 - v.reset(OpAMD64VRNDSCALEPSMasked128) - v.AuxInt = int8ToAuxInt(a + 0) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VPOPCNTWMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) v0.AddArg(mask) v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpMaskedRoundWithPrecisionFloat32x8(v *Value) bool { +func rewriteValueAMD64_OpPopCountMaskedInt16x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedRoundWithPrecisionFloat32x8 [a] x mask) - // result: (VRNDSCALEPSMasked256 [a+0] x (VPMOVVec32x8ToM mask)) + // match: (PopCountMaskedInt16x8 x mask) + // result: (VPOPCNTWMasked128 x (VPMOVVec16x8ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 mask := v_1 - v.reset(OpAMD64VRNDSCALEPSMasked256) - v.AuxInt = int8ToAuxInt(a + 0) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v.reset(OpAMD64VPOPCNTWMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpMaskedRoundWithPrecisionFloat64x2(v *Value) bool { +func rewriteValueAMD64_OpPopCountMaskedInt32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedRoundWithPrecisionFloat64x2 [a] x mask) - // result: (VRNDSCALEPDMasked128 [a+0] x (VPMOVVec64x2ToM mask)) + // match: (PopCountMaskedInt32x16 x mask) + // result: (VPOPCNTDMasked512 x (VPMOVVec32x16ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 mask := v_1 - v.reset(OpAMD64VRNDSCALEPDMasked128) - v.AuxInt = int8ToAuxInt(a + 0) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v.reset(OpAMD64VPOPCNTDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpMaskedRoundWithPrecisionFloat64x4(v *Value) bool { +func rewriteValueAMD64_OpPopCountMaskedInt32x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedRoundWithPrecisionFloat64x4 [a] x mask) - // result: (VRNDSCALEPDMasked256 [a+0] x (VPMOVVec64x4ToM mask)) + // match: (PopCountMaskedInt32x4 x mask) + // result: (VPOPCNTDMasked128 x (VPMOVVec32x4ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 mask := v_1 - v.reset(OpAMD64VRNDSCALEPDMasked256) - v.AuxInt = int8ToAuxInt(a + 0) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VPOPCNTDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpMaskedRoundWithPrecisionFloat64x8(v *Value) bool { +func rewriteValueAMD64_OpPopCountMaskedInt32x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedRoundWithPrecisionFloat64x8 [a] x mask) - // result: (VRNDSCALEPDMasked512 [a+0] x (VPMOVVec64x8ToM mask)) + // match: (PopCountMaskedInt32x8 x mask) + // result: (VPOPCNTDMasked256 x (VPMOVVec32x8ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 mask := v_1 - v.reset(OpAMD64VRNDSCALEPDMasked512) - v.AuxInt = int8ToAuxInt(a + 0) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v.reset(OpAMD64VPOPCNTDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpMaskedSaturatedAddInt16x16(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpPopCountMaskedInt64x2(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedSaturatedAddInt16x16 x y mask) - // result: (VPADDSWMasked256 x y (VPMOVVec16x16ToM mask)) + // match: (PopCountMaskedInt64x2 x mask) + // result: (VPOPCNTQMasked128 x (VPMOVVec64x2ToM mask)) for { x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPADDSWMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + mask := v_1 + v.reset(OpAMD64VPOPCNTQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpMaskedSaturatedAddInt16x32(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpPopCountMaskedInt64x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedSaturatedAddInt16x32 x y mask) - // result: (VPADDSWMasked512 x y (VPMOVVec16x32ToM mask)) + // match: (PopCountMaskedInt64x4 x mask) + // result: (VPOPCNTQMasked256 x (VPMOVVec64x4ToM mask)) for { x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPADDSWMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + mask := v_1 + v.reset(OpAMD64VPOPCNTQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpMaskedSaturatedAddInt16x8(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpPopCountMaskedInt64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedSaturatedAddInt16x8 x y mask) - // result: (VPADDSWMasked128 x y (VPMOVVec16x8ToM mask)) + // match: (PopCountMaskedInt64x8 x mask) + // result: (VPOPCNTQMasked512 x (VPMOVVec64x8ToM mask)) for { x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPADDSWMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + mask := v_1 + v.reset(OpAMD64VPOPCNTQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpMaskedSaturatedAddInt8x16(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpPopCountMaskedInt8x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedSaturatedAddInt8x16 x y mask) - // result: (VPADDSBMasked128 x y (VPMOVVec8x16ToM mask)) + // match: (PopCountMaskedInt8x16 x mask) + // result: (VPOPCNTBMasked128 x (VPMOVVec8x16ToM mask)) for { x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPADDSBMasked128) + mask := v_1 + v.reset(OpAMD64VPOPCNTBMasked128) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpMaskedSaturatedAddInt8x32(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpPopCountMaskedInt8x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedSaturatedAddInt8x32 x y mask) - // result: (VPADDSBMasked256 x y (VPMOVVec8x32ToM mask)) + // match: (PopCountMaskedInt8x32 x mask) + // result: (VPOPCNTBMasked256 x (VPMOVVec8x32ToM mask)) for { x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPADDSBMasked256) + mask := v_1 + v.reset(OpAMD64VPOPCNTBMasked256) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpMaskedSaturatedAddInt8x64(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpPopCountMaskedInt8x64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedSaturatedAddInt8x64 x y mask) - // result: (VPADDSBMasked512 x y (VPMOVVec8x64ToM mask)) + // match: (PopCountMaskedInt8x64 x mask) + // result: (VPOPCNTBMasked512 x (VPMOVVec8x64ToM mask)) for { x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPADDSBMasked512) + mask := v_1 + v.reset(OpAMD64VPOPCNTBMasked512) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpMaskedSaturatedAddUint16x16(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpPopCountMaskedUint16x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedSaturatedAddUint16x16 x y mask) - // result: (VPADDSWMasked256 x y (VPMOVVec16x16ToM mask)) + // match: (PopCountMaskedUint16x16 x mask) + // result: (VPOPCNTWMasked256 x (VPMOVVec16x16ToM mask)) for { x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPADDSWMasked256) + mask := v_1 + v.reset(OpAMD64VPOPCNTWMasked256) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpMaskedSaturatedAddUint16x32(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpPopCountMaskedUint16x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedSaturatedAddUint16x32 x y mask) - // result: (VPADDSWMasked512 x y (VPMOVVec16x32ToM mask)) + // match: (PopCountMaskedUint16x32 x mask) + // result: (VPOPCNTWMasked512 x (VPMOVVec16x32ToM mask)) for { x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPADDSWMasked512) + mask := v_1 + v.reset(OpAMD64VPOPCNTWMasked512) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpMaskedSaturatedAddUint16x8(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpPopCountMaskedUint16x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedSaturatedAddUint16x8 x y mask) - // result: (VPADDSWMasked128 x y (VPMOVVec16x8ToM mask)) + // match: (PopCountMaskedUint16x8 x mask) + // result: (VPOPCNTWMasked128 x (VPMOVVec16x8ToM mask)) for { x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPADDSWMasked128) + mask := v_1 + v.reset(OpAMD64VPOPCNTWMasked128) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpMaskedSaturatedAddUint8x16(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpPopCountMaskedUint32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedSaturatedAddUint8x16 x y mask) - // result: (VPADDSBMasked128 x y (VPMOVVec8x16ToM mask)) + // match: (PopCountMaskedUint32x16 x mask) + // result: (VPOPCNTDMasked512 x (VPMOVVec32x16ToM mask)) for { x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPADDSBMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + mask := v_1 + v.reset(OpAMD64VPOPCNTDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpMaskedSaturatedAddUint8x32(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpPopCountMaskedUint32x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedSaturatedAddUint8x32 x y mask) - // result: (VPADDSBMasked256 x y (VPMOVVec8x32ToM mask)) + // match: (PopCountMaskedUint32x4 x mask) + // result: (VPOPCNTDMasked128 x (VPMOVVec32x4ToM mask)) for { x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPADDSBMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + mask := v_1 + v.reset(OpAMD64VPOPCNTDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpMaskedSaturatedAddUint8x64(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpPopCountMaskedUint32x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedSaturatedAddUint8x64 x y mask) - // result: (VPADDSBMasked512 x y (VPMOVVec8x64ToM mask)) + // match: (PopCountMaskedUint32x8 x mask) + // result: (VPOPCNTDMasked256 x (VPMOVVec32x8ToM mask)) for { x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPADDSBMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) + mask := v_1 + v.reset(OpAMD64VPOPCNTDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpMaskedSaturatedPairDotProdAccumulateInt32x16(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpPopCountMaskedUint64x2(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedSaturatedPairDotProdAccumulateInt32x16 x y z mask) - // result: (VPDPWSSDSMasked512 x y z (VPMOVVec32x16ToM mask)) + // match: (PopCountMaskedUint64x2 x mask) + // result: (VPOPCNTQMasked128 x (VPMOVVec64x2ToM mask)) for { x := v_0 - y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VPDPWSSDSMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + mask := v_1 + v.reset(OpAMD64VPOPCNTQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpMaskedSaturatedPairDotProdAccumulateInt32x4(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpPopCountMaskedUint64x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedSaturatedPairDotProdAccumulateInt32x4 x y z mask) - // result: (VPDPWSSDSMasked128 x y z (VPMOVVec32x4ToM mask)) + // match: (PopCountMaskedUint64x4 x mask) + // result: (VPOPCNTQMasked256 x (VPMOVVec64x4ToM mask)) for { x := v_0 - y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VPDPWSSDSMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + mask := v_1 + v.reset(OpAMD64VPOPCNTQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpMaskedSaturatedPairDotProdAccumulateInt32x8(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpPopCountMaskedUint64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedSaturatedPairDotProdAccumulateInt32x8 x y z mask) - // result: (VPDPWSSDSMasked256 x y z (VPMOVVec32x8ToM mask)) + // match: (PopCountMaskedUint64x8 x mask) + // result: (VPOPCNTQMasked512 x (VPMOVVec64x8ToM mask)) for { x := v_0 - y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VPDPWSSDSMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + mask := v_1 + v.reset(OpAMD64VPOPCNTQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpMaskedSaturatedSubInt16x16(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpPopCountMaskedUint8x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedSaturatedSubInt16x16 x y mask) - // result: (VPSUBSWMasked256 x y (VPMOVVec16x16ToM mask)) + // match: (PopCountMaskedUint8x16 x mask) + // result: (VPOPCNTBMasked128 x (VPMOVVec8x16ToM mask)) for { x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSUBSWMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + mask := v_1 + v.reset(OpAMD64VPOPCNTBMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpMaskedSaturatedSubInt16x32(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpPopCountMaskedUint8x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedSaturatedSubInt16x32 x y mask) - // result: (VPSUBSWMasked512 x y (VPMOVVec16x32ToM mask)) + // match: (PopCountMaskedUint8x32 x mask) + // result: (VPOPCNTBMasked256 x (VPMOVVec8x32ToM mask)) for { x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSUBSWMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + mask := v_1 + v.reset(OpAMD64VPOPCNTBMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpMaskedSaturatedSubInt16x8(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpPopCountMaskedUint8x64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedSaturatedSubInt16x8 x y mask) - // result: (VPSUBSWMasked128 x y (VPMOVVec16x8ToM mask)) + // match: (PopCountMaskedUint8x64 x mask) + // result: (VPOPCNTBMasked512 x (VPMOVVec8x64ToM mask)) for { x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSUBSWMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + mask := v_1 + v.reset(OpAMD64VPOPCNTBMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpMaskedSaturatedSubInt8x16(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpRotateAllLeftInt32x16(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (MaskedSaturatedSubInt8x16 x y mask) - // result: (VPSUBSBMasked128 x y (VPMOVVec8x16ToM mask)) + // match: (RotateAllLeftInt32x16 [a] x) + // result: (VPROLD512 [a] x) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSUBSBMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPROLD512) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpMaskedSaturatedSubInt8x32(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpRotateAllLeftInt32x4(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (MaskedSaturatedSubInt8x32 x y mask) - // result: (VPSUBSBMasked256 x y (VPMOVVec8x32ToM mask)) + // match: (RotateAllLeftInt32x4 [a] x) + // result: (VPROLD128 [a] x) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSUBSBMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPROLD128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpMaskedSaturatedSubInt8x64(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpRotateAllLeftInt32x8(v *Value) bool { + v_0 := v.Args[0] + // match: (RotateAllLeftInt32x8 [a] x) + // result: (VPROLD256 [a] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VPROLD256) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpRotateAllLeftInt64x2(v *Value) bool { + v_0 := v.Args[0] + // match: (RotateAllLeftInt64x2 [a] x) + // result: (VPROLQ128 [a] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VPROLQ128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpRotateAllLeftInt64x4(v *Value) bool { + v_0 := v.Args[0] + // match: (RotateAllLeftInt64x4 [a] x) + // result: (VPROLQ256 [a] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VPROLQ256) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpRotateAllLeftInt64x8(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (MaskedSaturatedSubInt8x64 x y mask) - // result: (VPSUBSBMasked512 x y (VPMOVVec8x64ToM mask)) + // match: (RotateAllLeftInt64x8 [a] x) + // result: (VPROLQ512 [a] x) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSUBSBMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPROLQ512) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpMaskedSaturatedSubUint16x16(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpRotateAllLeftMaskedInt32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedSaturatedSubUint16x16 x y mask) - // result: (VPSUBSWMasked256 x y (VPMOVVec16x16ToM mask)) + // match: (RotateAllLeftMaskedInt32x16 [a] x mask) + // result: (VPROLDMasked512 [a] x (VPMOVVec32x16ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSUBSWMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + mask := v_1 + v.reset(OpAMD64VPROLDMasked512) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpMaskedSaturatedSubUint16x32(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpRotateAllLeftMaskedInt32x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedSaturatedSubUint16x32 x y mask) - // result: (VPSUBSWMasked512 x y (VPMOVVec16x32ToM mask)) + // match: (RotateAllLeftMaskedInt32x4 [a] x mask) + // result: (VPROLDMasked128 [a] x (VPMOVVec32x4ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSUBSWMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + mask := v_1 + v.reset(OpAMD64VPROLDMasked128) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpMaskedSaturatedSubUint16x8(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpRotateAllLeftMaskedInt32x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedSaturatedSubUint16x8 x y mask) - // result: (VPSUBSWMasked128 x y (VPMOVVec16x8ToM mask)) + // match: (RotateAllLeftMaskedInt32x8 [a] x mask) + // result: (VPROLDMasked256 [a] x (VPMOVVec32x8ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSUBSWMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + mask := v_1 + v.reset(OpAMD64VPROLDMasked256) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpMaskedSaturatedSubUint8x16(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpRotateAllLeftMaskedInt64x2(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedSaturatedSubUint8x16 x y mask) - // result: (VPSUBSBMasked128 x y (VPMOVVec8x16ToM mask)) + // match: (RotateAllLeftMaskedInt64x2 [a] x mask) + // result: (VPROLQMasked128 [a] x (VPMOVVec64x2ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSUBSBMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + mask := v_1 + v.reset(OpAMD64VPROLQMasked128) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpMaskedSaturatedSubUint8x32(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpRotateAllLeftMaskedInt64x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedSaturatedSubUint8x32 x y mask) - // result: (VPSUBSBMasked256 x y (VPMOVVec8x32ToM mask)) + // match: (RotateAllLeftMaskedInt64x4 [a] x mask) + // result: (VPROLQMasked256 [a] x (VPMOVVec64x4ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSUBSBMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + mask := v_1 + v.reset(OpAMD64VPROLQMasked256) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpMaskedSaturatedSubUint8x64(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpRotateAllLeftMaskedInt64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedSaturatedSubUint8x64 x y mask) - // result: (VPSUBSBMasked512 x y (VPMOVVec8x64ToM mask)) + // match: (RotateAllLeftMaskedInt64x8 [a] x mask) + // result: (VPROLQMasked512 [a] x (VPMOVVec64x8ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSUBSBMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) + mask := v_1 + v.reset(OpAMD64VPROLQMasked512) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedPairDotProdUint8x16(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpRotateAllLeftMaskedUint32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedSaturatedUnsignedSignedPairDotProdUint8x16 x y mask) - // result: (VPMADDUBSWMasked128 x y (VPMOVVec16x8ToM mask)) + // match: (RotateAllLeftMaskedUint32x16 [a] x mask) + // result: (VPROLDMasked512 [a] x (VPMOVVec32x16ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPMADDUBSWMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + mask := v_1 + v.reset(OpAMD64VPROLDMasked512) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedPairDotProdUint8x32(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpRotateAllLeftMaskedUint32x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedSaturatedUnsignedSignedPairDotProdUint8x32 x y mask) - // result: (VPMADDUBSWMasked256 x y (VPMOVVec16x16ToM mask)) + // match: (RotateAllLeftMaskedUint32x4 [a] x mask) + // result: (VPROLDMasked128 [a] x (VPMOVVec32x4ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPMADDUBSWMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + mask := v_1 + v.reset(OpAMD64VPROLDMasked128) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedPairDotProdUint8x64(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpRotateAllLeftMaskedUint32x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedSaturatedUnsignedSignedPairDotProdUint8x64 x y mask) - // result: (VPMADDUBSWMasked512 x y (VPMOVVec16x32ToM mask)) + // match: (RotateAllLeftMaskedUint32x8 [a] x mask) + // result: (VPROLDMasked256 [a] x (VPMOVVec32x8ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPMADDUBSWMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + mask := v_1 + v.reset(OpAMD64VPROLDMasked256) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpRotateAllLeftMaskedUint64x2(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16 x y z mask) - // result: (VPDPBUSDSMasked512 x y z (VPMOVVec32x16ToM mask)) + // match: (RotateAllLeftMaskedUint64x2 [a] x mask) + // result: (VPROLQMasked128 [a] x (VPMOVVec64x2ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VPDPBUSDSMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + mask := v_1 + v.reset(OpAMD64VPROLQMasked128) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpRotateAllLeftMaskedUint64x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4 x y z mask) - // result: (VPDPBUSDSMasked128 x y z (VPMOVVec32x4ToM mask)) + // match: (RotateAllLeftMaskedUint64x4 [a] x mask) + // result: (VPROLQMasked256 [a] x (VPMOVVec64x4ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VPDPBUSDSMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + mask := v_1 + v.reset(OpAMD64VPROLQMasked256) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpRotateAllLeftMaskedUint64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8 x y z mask) - // result: (VPDPBUSDSMasked256 x y z (VPMOVVec32x8ToM mask)) + // match: (RotateAllLeftMaskedUint64x8 [a] x mask) + // result: (VPROLQMasked512 [a] x (VPMOVVec64x8ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VPDPBUSDSMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + mask := v_1 + v.reset(OpAMD64VPROLQMasked512) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpRotateAllLeftUint32x16(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (MaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16 x y z mask) - // result: (VPDPBUSDSMasked512 x y z (VPMOVVec32x16ToM mask)) + // match: (RotateAllLeftUint32x16 [a] x) + // result: (VPROLD512 [a] x) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VPDPBUSDSMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + v.reset(OpAMD64VPROLD512) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpRotateAllLeftUint32x4(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (MaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4 x y z mask) - // result: (VPDPBUSDSMasked128 x y z (VPMOVVec32x4ToM mask)) + // match: (RotateAllLeftUint32x4 [a] x) + // result: (VPROLD128 [a] x) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VPDPBUSDSMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + v.reset(OpAMD64VPROLD128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpRotateAllLeftUint32x8(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (MaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8 x y z mask) - // result: (VPDPBUSDSMasked256 x y z (VPMOVVec32x8ToM mask)) + // match: (RotateAllLeftUint32x8 [a] x) + // result: (VPROLD256 [a] x) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VPDPBUSDSMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + v.reset(OpAMD64VPROLD256) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromInt16x16(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpRotateAllLeftUint64x2(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftAllLeftAndFillUpperFromInt16x16 [a] x y mask) - // result: (VPSHLDWMasked256 [a] x y (VPMOVVec16x16ToM mask)) + // match: (RotateAllLeftUint64x2 [a] x) + // result: (VPROLQ128 [a] x) for { a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSHLDWMasked256) + v.reset(OpAMD64VPROLQ128) v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromInt16x32(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpRotateAllLeftUint64x4(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftAllLeftAndFillUpperFromInt16x32 [a] x y mask) - // result: (VPSHLDWMasked512 [a] x y (VPMOVVec16x32ToM mask)) + // match: (RotateAllLeftUint64x4 [a] x) + // result: (VPROLQ256 [a] x) for { a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSHLDWMasked512) + v.reset(OpAMD64VPROLQ256) v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromInt16x8(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpRotateAllLeftUint64x8(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftAllLeftAndFillUpperFromInt16x8 [a] x y mask) - // result: (VPSHLDWMasked128 [a] x y (VPMOVVec16x8ToM mask)) + // match: (RotateAllLeftUint64x8 [a] x) + // result: (VPROLQ512 [a] x) for { a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSHLDWMasked128) + v.reset(OpAMD64VPROLQ512) v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromInt32x16(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpRotateAllRightInt32x16(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftAllLeftAndFillUpperFromInt32x16 [a] x y mask) - // result: (VPSHLDDMasked512 [a] x y (VPMOVVec32x16ToM mask)) + // match: (RotateAllRightInt32x16 [a] x) + // result: (VPRORD512 [a] x) for { a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSHLDDMasked512) + v.reset(OpAMD64VPRORD512) v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromInt32x4(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpRotateAllRightInt32x4(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftAllLeftAndFillUpperFromInt32x4 [a] x y mask) - // result: (VPSHLDDMasked128 [a] x y (VPMOVVec32x4ToM mask)) + // match: (RotateAllRightInt32x4 [a] x) + // result: (VPRORD128 [a] x) for { a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSHLDDMasked128) + v.reset(OpAMD64VPRORD128) v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromInt32x8(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpRotateAllRightInt32x8(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftAllLeftAndFillUpperFromInt32x8 [a] x y mask) - // result: (VPSHLDDMasked256 [a] x y (VPMOVVec32x8ToM mask)) + // match: (RotateAllRightInt32x8 [a] x) + // result: (VPRORD256 [a] x) for { a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSHLDDMasked256) + v.reset(OpAMD64VPRORD256) v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromInt64x2(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpRotateAllRightInt64x2(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftAllLeftAndFillUpperFromInt64x2 [a] x y mask) - // result: (VPSHLDQMasked128 [a] x y (VPMOVVec64x2ToM mask)) + // match: (RotateAllRightInt64x2 [a] x) + // result: (VPRORQ128 [a] x) for { a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSHLDQMasked128) + v.reset(OpAMD64VPRORQ128) v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromInt64x4(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpRotateAllRightInt64x4(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftAllLeftAndFillUpperFromInt64x4 [a] x y mask) - // result: (VPSHLDQMasked256 [a] x y (VPMOVVec64x4ToM mask)) + // match: (RotateAllRightInt64x4 [a] x) + // result: (VPRORQ256 [a] x) for { a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSHLDQMasked256) + v.reset(OpAMD64VPRORQ256) v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromInt64x8(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpRotateAllRightInt64x8(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftAllLeftAndFillUpperFromInt64x8 [a] x y mask) - // result: (VPSHLDQMasked512 [a] x y (VPMOVVec64x8ToM mask)) + // match: (RotateAllRightInt64x8 [a] x) + // result: (VPRORQ512 [a] x) for { a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSHLDQMasked512) + v.reset(OpAMD64VPRORQ512) v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromUint16x16(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpRotateAllRightMaskedInt32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftAllLeftAndFillUpperFromUint16x16 [a] x y mask) - // result: (VPSHLDWMasked256 [a] x y (VPMOVVec16x16ToM mask)) + // match: (RotateAllRightMaskedInt32x16 [a] x mask) + // result: (VPRORDMasked512 [a] x (VPMOVVec32x16ToM mask)) for { a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSHLDWMasked256) + mask := v_1 + v.reset(OpAMD64VPRORDMasked512) v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromUint16x32(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpRotateAllRightMaskedInt32x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftAllLeftAndFillUpperFromUint16x32 [a] x y mask) - // result: (VPSHLDWMasked512 [a] x y (VPMOVVec16x32ToM mask)) + // match: (RotateAllRightMaskedInt32x4 [a] x mask) + // result: (VPRORDMasked128 [a] x (VPMOVVec32x4ToM mask)) for { a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSHLDWMasked512) + mask := v_1 + v.reset(OpAMD64VPRORDMasked128) v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromUint16x8(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpRotateAllRightMaskedInt32x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftAllLeftAndFillUpperFromUint16x8 [a] x y mask) - // result: (VPSHLDWMasked128 [a] x y (VPMOVVec16x8ToM mask)) + // match: (RotateAllRightMaskedInt32x8 [a] x mask) + // result: (VPRORDMasked256 [a] x (VPMOVVec32x8ToM mask)) for { a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSHLDWMasked128) + mask := v_1 + v.reset(OpAMD64VPRORDMasked256) v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromUint32x16(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpRotateAllRightMaskedInt64x2(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftAllLeftAndFillUpperFromUint32x16 [a] x y mask) - // result: (VPSHLDDMasked512 [a] x y (VPMOVVec32x16ToM mask)) + // match: (RotateAllRightMaskedInt64x2 [a] x mask) + // result: (VPRORQMasked128 [a] x (VPMOVVec64x2ToM mask)) for { a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSHLDDMasked512) + mask := v_1 + v.reset(OpAMD64VPRORQMasked128) v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromUint32x4(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpRotateAllRightMaskedInt64x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftAllLeftAndFillUpperFromUint32x4 [a] x y mask) - // result: (VPSHLDDMasked128 [a] x y (VPMOVVec32x4ToM mask)) + // match: (RotateAllRightMaskedInt64x4 [a] x mask) + // result: (VPRORQMasked256 [a] x (VPMOVVec64x4ToM mask)) for { a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSHLDDMasked128) + mask := v_1 + v.reset(OpAMD64VPRORQMasked256) v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromUint32x8(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpRotateAllRightMaskedInt64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftAllLeftAndFillUpperFromUint32x8 [a] x y mask) - // result: (VPSHLDDMasked256 [a] x y (VPMOVVec32x8ToM mask)) + // match: (RotateAllRightMaskedInt64x8 [a] x mask) + // result: (VPRORQMasked512 [a] x (VPMOVVec64x8ToM mask)) for { a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSHLDDMasked256) + mask := v_1 + v.reset(OpAMD64VPRORQMasked512) v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromUint64x2(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpRotateAllRightMaskedUint32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftAllLeftAndFillUpperFromUint64x2 [a] x y mask) - // result: (VPSHLDQMasked128 [a] x y (VPMOVVec64x2ToM mask)) + // match: (RotateAllRightMaskedUint32x16 [a] x mask) + // result: (VPRORDMasked512 [a] x (VPMOVVec32x16ToM mask)) for { a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSHLDQMasked128) + mask := v_1 + v.reset(OpAMD64VPRORDMasked512) v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromUint64x4(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpRotateAllRightMaskedUint32x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftAllLeftAndFillUpperFromUint64x4 [a] x y mask) - // result: (VPSHLDQMasked256 [a] x y (VPMOVVec64x4ToM mask)) + // match: (RotateAllRightMaskedUint32x4 [a] x mask) + // result: (VPRORDMasked128 [a] x (VPMOVVec32x4ToM mask)) for { a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSHLDQMasked256) + mask := v_1 + v.reset(OpAMD64VPRORDMasked128) v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpMaskedShiftAllLeftAndFillUpperFromUint64x8(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpRotateAllRightMaskedUint32x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftAllLeftAndFillUpperFromUint64x8 [a] x y mask) - // result: (VPSHLDQMasked512 [a] x y (VPMOVVec64x8ToM mask)) + // match: (RotateAllRightMaskedUint32x8 [a] x mask) + // result: (VPRORDMasked256 [a] x (VPMOVVec32x8ToM mask)) for { a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSHLDQMasked512) + mask := v_1 + v.reset(OpAMD64VPRORDMasked256) v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpMaskedShiftAllLeftInt64x2(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpRotateAllRightMaskedUint64x2(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftAllLeftInt64x2 x y mask) - // result: (VPSLLQMasked128 x y (VPMOVVec64x2ToM mask)) + // match: (RotateAllRightMaskedUint64x2 [a] x mask) + // result: (VPRORQMasked128 [a] x (VPMOVVec64x2ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSLLQMasked128) + mask := v_1 + v.reset(OpAMD64VPRORQMasked128) + v.AuxInt = int8ToAuxInt(a) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpMaskedShiftAllLeftInt64x4(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpRotateAllRightMaskedUint64x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftAllLeftInt64x4 x y mask) - // result: (VPSLLQMasked256 x y (VPMOVVec64x4ToM mask)) + // match: (RotateAllRightMaskedUint64x4 [a] x mask) + // result: (VPRORQMasked256 [a] x (VPMOVVec64x4ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSLLQMasked256) + mask := v_1 + v.reset(OpAMD64VPRORQMasked256) + v.AuxInt = int8ToAuxInt(a) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpMaskedShiftAllLeftInt64x8(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpRotateAllRightMaskedUint64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftAllLeftInt64x8 x y mask) - // result: (VPSLLQMasked512 x y (VPMOVVec64x8ToM mask)) + // match: (RotateAllRightMaskedUint64x8 [a] x mask) + // result: (VPRORQMasked512 [a] x (VPMOVVec64x8ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSLLQMasked512) + mask := v_1 + v.reset(OpAMD64VPRORQMasked512) + v.AuxInt = int8ToAuxInt(a) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpMaskedShiftAllLeftUint64x2(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpRotateAllRightUint32x16(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftAllLeftUint64x2 x y mask) - // result: (VPSLLQMasked128 x y (VPMOVVec64x2ToM mask)) + // match: (RotateAllRightUint32x16 [a] x) + // result: (VPRORD512 [a] x) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSLLQMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPRORD512) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpMaskedShiftAllLeftUint64x4(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpRotateAllRightUint32x4(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftAllLeftUint64x4 x y mask) - // result: (VPSLLQMasked256 x y (VPMOVVec64x4ToM mask)) + // match: (RotateAllRightUint32x4 [a] x) + // result: (VPRORD128 [a] x) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSLLQMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPRORD128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpMaskedShiftAllLeftUint64x8(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpRotateAllRightUint32x8(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftAllLeftUint64x8 x y mask) - // result: (VPSLLQMasked512 x y (VPMOVVec64x8ToM mask)) + // match: (RotateAllRightUint32x8 [a] x) + // result: (VPRORD256 [a] x) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSLLQMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPRORD256) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromInt16x16(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpRotateAllRightUint64x2(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftAllRightAndFillUpperFromInt16x16 [a] x y mask) - // result: (VPSHRDWMasked256 [a] x y (VPMOVVec16x16ToM mask)) + // match: (RotateAllRightUint64x2 [a] x) + // result: (VPRORQ128 [a] x) for { a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSHRDWMasked256) + v.reset(OpAMD64VPRORQ128) v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromInt16x32(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpRotateAllRightUint64x4(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftAllRightAndFillUpperFromInt16x32 [a] x y mask) - // result: (VPSHRDWMasked512 [a] x y (VPMOVVec16x32ToM mask)) + // match: (RotateAllRightUint64x4 [a] x) + // result: (VPRORQ256 [a] x) for { a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSHRDWMasked512) + v.reset(OpAMD64VPRORQ256) v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromInt16x8(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpRotateAllRightUint64x8(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftAllRightAndFillUpperFromInt16x8 [a] x y mask) - // result: (VPSHRDWMasked128 [a] x y (VPMOVVec16x8ToM mask)) + // match: (RotateAllRightUint64x8 [a] x) + // result: (VPRORQ512 [a] x) for { a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSHRDWMasked128) + v.reset(OpAMD64VPRORQ512) v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromInt32x16(v *Value) bool { +func rewriteValueAMD64_OpRotateLeftMaskedInt32x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftAllRightAndFillUpperFromInt32x16 [a] x y mask) - // result: (VPSHRDDMasked512 [a] x y (VPMOVVec32x16ToM mask)) + // match: (RotateLeftMaskedInt32x16 x y mask) + // result: (VPROLVDMasked512 x y (VPMOVVec32x16ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSHRDDMasked512) - v.AuxInt = int8ToAuxInt(a) + v.reset(OpAMD64VPROLVDMasked512) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromInt32x4(v *Value) bool { +func rewriteValueAMD64_OpRotateLeftMaskedInt32x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftAllRightAndFillUpperFromInt32x4 [a] x y mask) - // result: (VPSHRDDMasked128 [a] x y (VPMOVVec32x4ToM mask)) + // match: (RotateLeftMaskedInt32x4 x y mask) + // result: (VPROLVDMasked128 x y (VPMOVVec32x4ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSHRDDMasked128) - v.AuxInt = int8ToAuxInt(a) + v.reset(OpAMD64VPROLVDMasked128) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromInt32x8(v *Value) bool { +func rewriteValueAMD64_OpRotateLeftMaskedInt32x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftAllRightAndFillUpperFromInt32x8 [a] x y mask) - // result: (VPSHRDDMasked256 [a] x y (VPMOVVec32x8ToM mask)) + // match: (RotateLeftMaskedInt32x8 x y mask) + // result: (VPROLVDMasked256 x y (VPMOVVec32x8ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSHRDDMasked256) - v.AuxInt = int8ToAuxInt(a) + v.reset(OpAMD64VPROLVDMasked256) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromInt64x2(v *Value) bool { +func rewriteValueAMD64_OpRotateLeftMaskedInt64x2(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftAllRightAndFillUpperFromInt64x2 [a] x y mask) - // result: (VPSHRDQMasked128 [a] x y (VPMOVVec64x2ToM mask)) + // match: (RotateLeftMaskedInt64x2 x y mask) + // result: (VPROLVQMasked128 x y (VPMOVVec64x2ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSHRDQMasked128) - v.AuxInt = int8ToAuxInt(a) + v.reset(OpAMD64VPROLVQMasked128) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromInt64x4(v *Value) bool { +func rewriteValueAMD64_OpRotateLeftMaskedInt64x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftAllRightAndFillUpperFromInt64x4 [a] x y mask) - // result: (VPSHRDQMasked256 [a] x y (VPMOVVec64x4ToM mask)) + // match: (RotateLeftMaskedInt64x4 x y mask) + // result: (VPROLVQMasked256 x y (VPMOVVec64x4ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSHRDQMasked256) - v.AuxInt = int8ToAuxInt(a) + v.reset(OpAMD64VPROLVQMasked256) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromInt64x8(v *Value) bool { +func rewriteValueAMD64_OpRotateLeftMaskedInt64x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftAllRightAndFillUpperFromInt64x8 [a] x y mask) - // result: (VPSHRDQMasked512 [a] x y (VPMOVVec64x8ToM mask)) + // match: (RotateLeftMaskedInt64x8 x y mask) + // result: (VPROLVQMasked512 x y (VPMOVVec64x8ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSHRDQMasked512) - v.AuxInt = int8ToAuxInt(a) + v.reset(OpAMD64VPROLVQMasked512) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromUint16x16(v *Value) bool { +func rewriteValueAMD64_OpRotateLeftMaskedUint32x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftAllRightAndFillUpperFromUint16x16 [a] x y mask) - // result: (VPSHRDWMasked256 [a] x y (VPMOVVec16x16ToM mask)) + // match: (RotateLeftMaskedUint32x16 x y mask) + // result: (VPROLVDMasked512 x y (VPMOVVec32x16ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSHRDWMasked256) - v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v.reset(OpAMD64VPROLVDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromUint16x32(v *Value) bool { +func rewriteValueAMD64_OpRotateLeftMaskedUint32x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftAllRightAndFillUpperFromUint16x32 [a] x y mask) - // result: (VPSHRDWMasked512 [a] x y (VPMOVVec16x32ToM mask)) + // match: (RotateLeftMaskedUint32x4 x y mask) + // result: (VPROLVDMasked128 x y (VPMOVVec32x4ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSHRDWMasked512) - v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v.reset(OpAMD64VPROLVDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromUint16x8(v *Value) bool { +func rewriteValueAMD64_OpRotateLeftMaskedUint32x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftAllRightAndFillUpperFromUint16x8 [a] x y mask) - // result: (VPSHRDWMasked128 [a] x y (VPMOVVec16x8ToM mask)) + // match: (RotateLeftMaskedUint32x8 x y mask) + // result: (VPROLVDMasked256 x y (VPMOVVec32x8ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSHRDWMasked128) - v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v.reset(OpAMD64VPROLVDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromUint32x16(v *Value) bool { +func rewriteValueAMD64_OpRotateLeftMaskedUint64x2(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftAllRightAndFillUpperFromUint32x16 [a] x y mask) - // result: (VPSHRDDMasked512 [a] x y (VPMOVVec32x16ToM mask)) + // match: (RotateLeftMaskedUint64x2 x y mask) + // result: (VPROLVQMasked128 x y (VPMOVVec64x2ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSHRDDMasked512) - v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v.reset(OpAMD64VPROLVQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromUint32x4(v *Value) bool { +func rewriteValueAMD64_OpRotateLeftMaskedUint64x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftAllRightAndFillUpperFromUint32x4 [a] x y mask) - // result: (VPSHRDDMasked128 [a] x y (VPMOVVec32x4ToM mask)) + // match: (RotateLeftMaskedUint64x4 x y mask) + // result: (VPROLVQMasked256 x y (VPMOVVec64x4ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSHRDDMasked128) - v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VPROLVQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromUint32x8(v *Value) bool { +func rewriteValueAMD64_OpRotateLeftMaskedUint64x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftAllRightAndFillUpperFromUint32x8 [a] x y mask) - // result: (VPSHRDDMasked256 [a] x y (VPMOVVec32x8ToM mask)) + // match: (RotateLeftMaskedUint64x8 x y mask) + // result: (VPROLVQMasked512 x y (VPMOVVec64x8ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSHRDDMasked256) - v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v.reset(OpAMD64VPROLVQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromUint64x2(v *Value) bool { +func rewriteValueAMD64_OpRotateRightMaskedInt32x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftAllRightAndFillUpperFromUint64x2 [a] x y mask) - // result: (VPSHRDQMasked128 [a] x y (VPMOVVec64x2ToM mask)) + // match: (RotateRightMaskedInt32x16 x y mask) + // result: (VPRORVDMasked512 x y (VPMOVVec32x16ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSHRDQMasked128) - v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v.reset(OpAMD64VPRORVDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromUint64x4(v *Value) bool { +func rewriteValueAMD64_OpRotateRightMaskedInt32x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftAllRightAndFillUpperFromUint64x4 [a] x y mask) - // result: (VPSHRDQMasked256 [a] x y (VPMOVVec64x4ToM mask)) + // match: (RotateRightMaskedInt32x4 x y mask) + // result: (VPRORVDMasked128 x y (VPMOVVec32x4ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSHRDQMasked256) - v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VPRORVDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedShiftAllRightAndFillUpperFromUint64x8(v *Value) bool { +func rewriteValueAMD64_OpRotateRightMaskedInt32x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftAllRightAndFillUpperFromUint64x8 [a] x y mask) - // result: (VPSHRDQMasked512 [a] x y (VPMOVVec64x8ToM mask)) + // match: (RotateRightMaskedInt32x8 x y mask) + // result: (VPRORVDMasked256 x y (VPMOVVec32x8ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSHRDQMasked512) - v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v.reset(OpAMD64VPRORVDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedShiftAllRightInt64x2(v *Value) bool { +func rewriteValueAMD64_OpRotateRightMaskedInt64x2(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftAllRightInt64x2 x y mask) - // result: (VPSRLQMasked128 x y (VPMOVVec64x2ToM mask)) + // match: (RotateRightMaskedInt64x2 x y mask) + // result: (VPRORVQMasked128 x y (VPMOVVec64x2ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSRLQMasked128) + v.reset(OpAMD64VPRORVQMasked128) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedShiftAllRightInt64x4(v *Value) bool { +func rewriteValueAMD64_OpRotateRightMaskedInt64x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftAllRightInt64x4 x y mask) - // result: (VPSRLQMasked256 x y (VPMOVVec64x4ToM mask)) + // match: (RotateRightMaskedInt64x4 x y mask) + // result: (VPRORVQMasked256 x y (VPMOVVec64x4ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSRLQMasked256) + v.reset(OpAMD64VPRORVQMasked256) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedShiftAllRightInt64x8(v *Value) bool { +func rewriteValueAMD64_OpRotateRightMaskedInt64x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftAllRightInt64x8 x y mask) - // result: (VPSRLQMasked512 x y (VPMOVVec64x8ToM mask)) + // match: (RotateRightMaskedInt64x8 x y mask) + // result: (VPRORVQMasked512 x y (VPMOVVec64x8ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSRLQMasked512) + v.reset(OpAMD64VPRORVQMasked512) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedShiftAllRightSignExtendedInt64x2(v *Value) bool { +func rewriteValueAMD64_OpRotateRightMaskedUint32x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftAllRightSignExtendedInt64x2 x y mask) - // result: (VPSRAQMasked128 x y (VPMOVVec64x2ToM mask)) + // match: (RotateRightMaskedUint32x16 x y mask) + // result: (VPRORVDMasked512 x y (VPMOVVec32x16ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSRAQMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v.reset(OpAMD64VPRORVDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedShiftAllRightSignExtendedInt64x4(v *Value) bool { +func rewriteValueAMD64_OpRotateRightMaskedUint32x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftAllRightSignExtendedInt64x4 x y mask) - // result: (VPSRAQMasked256 x y (VPMOVVec64x4ToM mask)) + // match: (RotateRightMaskedUint32x4 x y mask) + // result: (VPRORVDMasked128 x y (VPMOVVec32x4ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSRAQMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VPRORVDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedShiftAllRightSignExtendedInt64x8(v *Value) bool { +func rewriteValueAMD64_OpRotateRightMaskedUint32x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftAllRightSignExtendedInt64x8 x y mask) - // result: (VPSRAQMasked512 x y (VPMOVVec64x8ToM mask)) + // match: (RotateRightMaskedUint32x8 x y mask) + // result: (VPRORVDMasked256 x y (VPMOVVec32x8ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSRAQMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v.reset(OpAMD64VPRORVDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedShiftAllRightUint64x2(v *Value) bool { +func rewriteValueAMD64_OpRotateRightMaskedUint64x2(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftAllRightUint64x2 x y mask) - // result: (VPSRLQMasked128 x y (VPMOVVec64x2ToM mask)) + // match: (RotateRightMaskedUint64x2 x y mask) + // result: (VPRORVQMasked128 x y (VPMOVVec64x2ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSRLQMasked128) + v.reset(OpAMD64VPRORVQMasked128) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedShiftAllRightUint64x4(v *Value) bool { +func rewriteValueAMD64_OpRotateRightMaskedUint64x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftAllRightUint64x4 x y mask) - // result: (VPSRLQMasked256 x y (VPMOVVec64x4ToM mask)) + // match: (RotateRightMaskedUint64x4 x y mask) + // result: (VPRORVQMasked256 x y (VPMOVVec64x4ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSRLQMasked256) + v.reset(OpAMD64VPRORVQMasked256) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedShiftAllRightUint64x8(v *Value) bool { +func rewriteValueAMD64_OpRotateRightMaskedUint64x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftAllRightUint64x8 x y mask) - // result: (VPSRLQMasked512 x y (VPMOVVec64x8ToM mask)) + // match: (RotateRightMaskedUint64x8 x y mask) + // result: (VPRORVQMasked512 x y (VPMOVVec64x8ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSRLQMasked512) + v.reset(OpAMD64VPRORVQMasked512) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromInt16x16(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftLeftAndFillUpperFromInt16x16 x y z mask) - // result: (VPSHLDVWMasked256 x y z (VPMOVVec16x16ToM mask)) - for { - x := v_0 - y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VPSHLDVWMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) - return true - } -} -func rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromInt16x32(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpRoundFloat32x4(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftLeftAndFillUpperFromInt16x32 x y z mask) - // result: (VPSHLDVWMasked512 x y z (VPMOVVec16x32ToM mask)) + // match: (RoundFloat32x4 x) + // result: (VROUNDPS128 [0] x) for { x := v_0 - y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VPSHLDVWMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + v.reset(OpAMD64VROUNDPS128) + v.AuxInt = int8ToAuxInt(0) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromInt16x8(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpRoundFloat32x8(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftLeftAndFillUpperFromInt16x8 x y z mask) - // result: (VPSHLDVWMasked128 x y z (VPMOVVec16x8ToM mask)) + // match: (RoundFloat32x8 x) + // result: (VROUNDPS256 [0] x) for { x := v_0 - y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VPSHLDVWMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + v.reset(OpAMD64VROUNDPS256) + v.AuxInt = int8ToAuxInt(0) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromInt32x16(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpRoundFloat64x2(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftLeftAndFillUpperFromInt32x16 x y z mask) - // result: (VPSHLDVDMasked512 x y z (VPMOVVec32x16ToM mask)) + // match: (RoundFloat64x2 x) + // result: (VROUNDPD128 [0] x) for { x := v_0 - y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VPSHLDVDMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + v.reset(OpAMD64VROUNDPD128) + v.AuxInt = int8ToAuxInt(0) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromInt32x4(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpRoundFloat64x4(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftLeftAndFillUpperFromInt32x4 x y z mask) - // result: (VPSHLDVDMasked128 x y z (VPMOVVec32x4ToM mask)) + // match: (RoundFloat64x4 x) + // result: (VROUNDPD256 [0] x) for { x := v_0 - y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VPSHLDVDMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + v.reset(OpAMD64VROUNDPD256) + v.AuxInt = int8ToAuxInt(0) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromInt32x8(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpRoundToEven(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftLeftAndFillUpperFromInt32x8 x y z mask) - // result: (VPSHLDVDMasked256 x y z (VPMOVVec32x8ToM mask)) + // match: (RoundToEven x) + // result: (ROUNDSD [0] x) for { x := v_0 - y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VPSHLDVDMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + v.reset(OpAMD64ROUNDSD) + v.AuxInt = int8ToAuxInt(0) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromInt64x2(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpRoundWithPrecisionFloat32x16(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftLeftAndFillUpperFromInt64x2 x y z mask) - // result: (VPSHLDVQMasked128 x y z (VPMOVVec64x2ToM mask)) + // match: (RoundWithPrecisionFloat32x16 [a] x) + // result: (VRNDSCALEPS512 [a+0] x) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VPSHLDVQMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + v.reset(OpAMD64VRNDSCALEPS512) + v.AuxInt = int8ToAuxInt(a + 0) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromInt64x4(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpRoundWithPrecisionFloat32x4(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftLeftAndFillUpperFromInt64x4 x y z mask) - // result: (VPSHLDVQMasked256 x y z (VPMOVVec64x4ToM mask)) + // match: (RoundWithPrecisionFloat32x4 [a] x) + // result: (VRNDSCALEPS128 [a+0] x) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VPSHLDVQMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + v.reset(OpAMD64VRNDSCALEPS128) + v.AuxInt = int8ToAuxInt(a + 0) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromInt64x8(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpRoundWithPrecisionFloat32x8(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftLeftAndFillUpperFromInt64x8 x y z mask) - // result: (VPSHLDVQMasked512 x y z (VPMOVVec64x8ToM mask)) + // match: (RoundWithPrecisionFloat32x8 [a] x) + // result: (VRNDSCALEPS256 [a+0] x) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VPSHLDVQMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + v.reset(OpAMD64VRNDSCALEPS256) + v.AuxInt = int8ToAuxInt(a + 0) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromUint16x16(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpRoundWithPrecisionFloat64x2(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftLeftAndFillUpperFromUint16x16 x y z mask) - // result: (VPSHLDVWMasked256 x y z (VPMOVVec16x16ToM mask)) + // match: (RoundWithPrecisionFloat64x2 [a] x) + // result: (VRNDSCALEPD128 [a+0] x) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VPSHLDVWMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + v.reset(OpAMD64VRNDSCALEPD128) + v.AuxInt = int8ToAuxInt(a + 0) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromUint16x32(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpRoundWithPrecisionFloat64x4(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftLeftAndFillUpperFromUint16x32 x y z mask) - // result: (VPSHLDVWMasked512 x y z (VPMOVVec16x32ToM mask)) + // match: (RoundWithPrecisionFloat64x4 [a] x) + // result: (VRNDSCALEPD256 [a+0] x) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VPSHLDVWMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + v.reset(OpAMD64VRNDSCALEPD256) + v.AuxInt = int8ToAuxInt(a + 0) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromUint16x8(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpRoundWithPrecisionFloat64x8(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftLeftAndFillUpperFromUint16x8 x y z mask) - // result: (VPSHLDVWMasked128 x y z (VPMOVVec16x8ToM mask)) + // match: (RoundWithPrecisionFloat64x8 [a] x) + // result: (VRNDSCALEPD512 [a+0] x) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VPSHLDVWMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + v.reset(OpAMD64VRNDSCALEPD512) + v.AuxInt = int8ToAuxInt(a + 0) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromUint32x16(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpRoundWithPrecisionMaskedFloat32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftLeftAndFillUpperFromUint32x16 x y z mask) - // result: (VPSHLDVDMasked512 x y z (VPMOVVec32x16ToM mask)) + // match: (RoundWithPrecisionMaskedFloat32x16 [a] x mask) + // result: (VRNDSCALEPSMasked512 [a+0] x (VPMOVVec32x16ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VPSHLDVDMasked512) + mask := v_1 + v.reset(OpAMD64VRNDSCALEPSMasked512) + v.AuxInt = int8ToAuxInt(a + 0) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromUint32x4(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpRoundWithPrecisionMaskedFloat32x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftLeftAndFillUpperFromUint32x4 x y z mask) - // result: (VPSHLDVDMasked128 x y z (VPMOVVec32x4ToM mask)) + // match: (RoundWithPrecisionMaskedFloat32x4 [a] x mask) + // result: (VRNDSCALEPSMasked128 [a+0] x (VPMOVVec32x4ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VPSHLDVDMasked128) + mask := v_1 + v.reset(OpAMD64VRNDSCALEPSMasked128) + v.AuxInt = int8ToAuxInt(a + 0) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromUint32x8(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpRoundWithPrecisionMaskedFloat32x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftLeftAndFillUpperFromUint32x8 x y z mask) - // result: (VPSHLDVDMasked256 x y z (VPMOVVec32x8ToM mask)) + // match: (RoundWithPrecisionMaskedFloat32x8 [a] x mask) + // result: (VRNDSCALEPSMasked256 [a+0] x (VPMOVVec32x8ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VPSHLDVDMasked256) + mask := v_1 + v.reset(OpAMD64VRNDSCALEPSMasked256) + v.AuxInt = int8ToAuxInt(a + 0) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromUint64x2(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpRoundWithPrecisionMaskedFloat64x2(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftLeftAndFillUpperFromUint64x2 x y z mask) - // result: (VPSHLDVQMasked128 x y z (VPMOVVec64x2ToM mask)) + // match: (RoundWithPrecisionMaskedFloat64x2 [a] x mask) + // result: (VRNDSCALEPDMasked128 [a+0] x (VPMOVVec64x2ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VPSHLDVQMasked128) + mask := v_1 + v.reset(OpAMD64VRNDSCALEPDMasked128) + v.AuxInt = int8ToAuxInt(a + 0) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromUint64x4(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpRoundWithPrecisionMaskedFloat64x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftLeftAndFillUpperFromUint64x4 x y z mask) - // result: (VPSHLDVQMasked256 x y z (VPMOVVec64x4ToM mask)) + // match: (RoundWithPrecisionMaskedFloat64x4 [a] x mask) + // result: (VRNDSCALEPDMasked256 [a+0] x (VPMOVVec64x4ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VPSHLDVQMasked256) + mask := v_1 + v.reset(OpAMD64VRNDSCALEPDMasked256) + v.AuxInt = int8ToAuxInt(a + 0) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpMaskedShiftLeftAndFillUpperFromUint64x8(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpRoundWithPrecisionMaskedFloat64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftLeftAndFillUpperFromUint64x8 x y z mask) - // result: (VPSHLDVQMasked512 x y z (VPMOVVec64x8ToM mask)) + // match: (RoundWithPrecisionMaskedFloat64x8 [a] x mask) + // result: (VRNDSCALEPDMasked512 [a+0] x (VPMOVVec64x8ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VPSHLDVQMasked512) + mask := v_1 + v.reset(OpAMD64VRNDSCALEPDMasked512) + v.AuxInt = int8ToAuxInt(a + 0) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpMaskedShiftLeftInt16x16(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpRsh16Ux16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftLeftInt16x16 x y mask) - // result: (VPSLLVWMasked256 x y (VPMOVVec16x16ToM mask)) + // match: (Rsh16Ux16 x y) + // cond: !shiftIsBounded(v) + // result: (ANDL (SHRW x y) (SBBLcarrymask (CMPWconst y [16]))) for { + t := v.Type x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSLLVWMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDL) + v0 := b.NewValue0(v.Pos, OpAMD64SHRW, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) + v2.AuxInt = int16ToAuxInt(16) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) return true } -} -func rewriteValueAMD64_OpMaskedShiftLeftInt16x32(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftLeftInt16x32 x y mask) - // result: (VPSLLVWMasked512 x y (VPMOVVec16x32ToM mask)) + // match: (Rsh16Ux16 x y) + // cond: shiftIsBounded(v) + // result: (SHRW x y) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSLLVWMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHRW) + v.AddArg2(x, y) return true } + return false } -func rewriteValueAMD64_OpMaskedShiftLeftInt16x8(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpRsh16Ux32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftLeftInt16x8 x y mask) - // result: (VPSLLVWMasked128 x y (VPMOVVec16x8ToM mask)) + // match: (Rsh16Ux32 x y) + // cond: !shiftIsBounded(v) + // result: (ANDL (SHRW x y) (SBBLcarrymask (CMPLconst y [16]))) for { + t := v.Type x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSLLVWMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDL) + v0 := b.NewValue0(v.Pos, OpAMD64SHRW, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) + v2.AuxInt = int32ToAuxInt(16) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) return true } -} -func rewriteValueAMD64_OpMaskedShiftLeftInt32x16(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftLeftInt32x16 x y mask) - // result: (VPSLLVDMasked512 x y (VPMOVVec32x16ToM mask)) + // match: (Rsh16Ux32 x y) + // cond: shiftIsBounded(v) + // result: (SHRW x y) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSLLVDMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHRW) + v.AddArg2(x, y) return true } + return false } -func rewriteValueAMD64_OpMaskedShiftLeftInt32x4(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpRsh16Ux64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftLeftInt32x4 x y mask) - // result: (VPSLLVDMasked128 x y (VPMOVVec32x4ToM mask)) + // match: (Rsh16Ux64 x y) + // cond: !shiftIsBounded(v) + // result: (ANDL (SHRW x y) (SBBLcarrymask (CMPQconst y [16]))) for { + t := v.Type x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSLLVDMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDL) + v0 := b.NewValue0(v.Pos, OpAMD64SHRW, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) + v2.AuxInt = int32ToAuxInt(16) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) return true } -} -func rewriteValueAMD64_OpMaskedShiftLeftInt32x8(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftLeftInt32x8 x y mask) - // result: (VPSLLVDMasked256 x y (VPMOVVec32x8ToM mask)) + // match: (Rsh16Ux64 x y) + // cond: shiftIsBounded(v) + // result: (SHRW x y) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSLLVDMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHRW) + v.AddArg2(x, y) return true } + return false } -func rewriteValueAMD64_OpMaskedShiftLeftInt64x2(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpRsh16Ux8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftLeftInt64x2 x y mask) - // result: (VPSLLVQMasked128 x y (VPMOVVec64x2ToM mask)) + // match: (Rsh16Ux8 x y) + // cond: !shiftIsBounded(v) + // result: (ANDL (SHRW x y) (SBBLcarrymask (CMPBconst y [16]))) for { + t := v.Type x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSLLVQMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDL) + v0 := b.NewValue0(v.Pos, OpAMD64SHRW, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) + v2.AuxInt = int8ToAuxInt(16) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) return true } -} -func rewriteValueAMD64_OpMaskedShiftLeftInt64x4(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftLeftInt64x4 x y mask) - // result: (VPSLLVQMasked256 x y (VPMOVVec64x4ToM mask)) + // match: (Rsh16Ux8 x y) + // cond: shiftIsBounded(v) + // result: (SHRW x y) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSLLVQMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHRW) + v.AddArg2(x, y) return true } + return false } -func rewriteValueAMD64_OpMaskedShiftLeftInt64x8(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpRsh16x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftLeftInt64x8 x y mask) - // result: (VPSLLVQMasked512 x y (VPMOVVec64x8ToM mask)) + // match: (Rsh16x16 x y) + // cond: !shiftIsBounded(v) + // result: (SARW x (ORL y (NOTL (SBBLcarrymask (CMPWconst y [16]))))) for { + t := v.Type x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSLLVQMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARW) + v.Type = t + v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type) + v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type) + v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type) + v3 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) + v3.AuxInt = int16ToAuxInt(16) + v3.AddArg(y) + v2.AddArg(v3) + v1.AddArg(v2) + v0.AddArg2(y, v1) + v.AddArg2(x, v0) return true } -} -func rewriteValueAMD64_OpMaskedShiftLeftUint16x16(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftLeftUint16x16 x y mask) - // result: (VPSLLVWMasked256 x y (VPMOVVec16x16ToM mask)) + // match: (Rsh16x16 x y) + // cond: shiftIsBounded(v) + // result: (SARW x y) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSLLVWMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARW) + v.AddArg2(x, y) return true } + return false } -func rewriteValueAMD64_OpMaskedShiftLeftUint16x32(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpRsh16x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftLeftUint16x32 x y mask) - // result: (VPSLLVWMasked512 x y (VPMOVVec16x32ToM mask)) + // match: (Rsh16x32 x y) + // cond: !shiftIsBounded(v) + // result: (SARW x (ORL y (NOTL (SBBLcarrymask (CMPLconst y [16]))))) for { + t := v.Type x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSLLVWMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARW) + v.Type = t + v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type) + v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type) + v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type) + v3 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) + v3.AuxInt = int32ToAuxInt(16) + v3.AddArg(y) + v2.AddArg(v3) + v1.AddArg(v2) + v0.AddArg2(y, v1) + v.AddArg2(x, v0) return true } -} -func rewriteValueAMD64_OpMaskedShiftLeftUint16x8(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftLeftUint16x8 x y mask) - // result: (VPSLLVWMasked128 x y (VPMOVVec16x8ToM mask)) + // match: (Rsh16x32 x y) + // cond: shiftIsBounded(v) + // result: (SARW x y) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSLLVWMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARW) + v.AddArg2(x, y) return true } + return false } -func rewriteValueAMD64_OpMaskedShiftLeftUint32x16(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpRsh16x64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftLeftUint32x16 x y mask) - // result: (VPSLLVDMasked512 x y (VPMOVVec32x16ToM mask)) + // match: (Rsh16x64 x y) + // cond: !shiftIsBounded(v) + // result: (SARW x (ORQ y (NOTQ (SBBQcarrymask (CMPQconst y [16]))))) for { + t := v.Type x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSLLVDMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARW) + v.Type = t + v0 := b.NewValue0(v.Pos, OpAMD64ORQ, y.Type) + v1 := b.NewValue0(v.Pos, OpAMD64NOTQ, y.Type) + v2 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, y.Type) + v3 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) + v3.AuxInt = int32ToAuxInt(16) + v3.AddArg(y) + v2.AddArg(v3) + v1.AddArg(v2) + v0.AddArg2(y, v1) + v.AddArg2(x, v0) return true } -} -func rewriteValueAMD64_OpMaskedShiftLeftUint32x4(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftLeftUint32x4 x y mask) - // result: (VPSLLVDMasked128 x y (VPMOVVec32x4ToM mask)) + // match: (Rsh16x64 x y) + // cond: shiftIsBounded(v) + // result: (SARW x y) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSLLVDMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARW) + v.AddArg2(x, y) return true } + return false } -func rewriteValueAMD64_OpMaskedShiftLeftUint32x8(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpRsh16x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftLeftUint32x8 x y mask) - // result: (VPSLLVDMasked256 x y (VPMOVVec32x8ToM mask)) + // match: (Rsh16x8 x y) + // cond: !shiftIsBounded(v) + // result: (SARW x (ORL y (NOTL (SBBLcarrymask (CMPBconst y [16]))))) for { + t := v.Type x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSLLVDMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARW) + v.Type = t + v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type) + v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type) + v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type) + v3 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) + v3.AuxInt = int8ToAuxInt(16) + v3.AddArg(y) + v2.AddArg(v3) + v1.AddArg(v2) + v0.AddArg2(y, v1) + v.AddArg2(x, v0) return true } -} -func rewriteValueAMD64_OpMaskedShiftLeftUint64x2(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftLeftUint64x2 x y mask) - // result: (VPSLLVQMasked128 x y (VPMOVVec64x2ToM mask)) + // match: (Rsh16x8 x y) + // cond: shiftIsBounded(v) + // result: (SARW x y) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSLLVQMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARW) + v.AddArg2(x, y) return true } + return false } -func rewriteValueAMD64_OpMaskedShiftLeftUint64x4(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpRsh32Ux16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftLeftUint64x4 x y mask) - // result: (VPSLLVQMasked256 x y (VPMOVVec64x4ToM mask)) + // match: (Rsh32Ux16 x y) + // cond: !shiftIsBounded(v) + // result: (ANDL (SHRL x y) (SBBLcarrymask (CMPWconst y [32]))) for { + t := v.Type x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSLLVQMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDL) + v0 := b.NewValue0(v.Pos, OpAMD64SHRL, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) + v2.AuxInt = int16ToAuxInt(32) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) return true } -} -func rewriteValueAMD64_OpMaskedShiftLeftUint64x8(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftLeftUint64x8 x y mask) - // result: (VPSLLVQMasked512 x y (VPMOVVec64x8ToM mask)) + // match: (Rsh32Ux16 x y) + // cond: shiftIsBounded(v) + // result: (SHRL x y) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSLLVQMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHRL) + v.AddArg2(x, y) return true } + return false } -func rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromInt16x16(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpRsh32Ux32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftRightAndFillUpperFromInt16x16 x y z mask) - // result: (VPSHRDVWMasked256 x y z (VPMOVVec16x16ToM mask)) + // match: (Rsh32Ux32 x y) + // cond: !shiftIsBounded(v) + // result: (ANDL (SHRL x y) (SBBLcarrymask (CMPLconst y [32]))) for { + t := v.Type x := v_0 y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VPSHRDVWMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDL) + v0 := b.NewValue0(v.Pos, OpAMD64SHRL, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) + v2.AuxInt = int32ToAuxInt(32) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) return true } -} -func rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromInt16x32(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftRightAndFillUpperFromInt16x32 x y z mask) - // result: (VPSHRDVWMasked512 x y z (VPMOVVec16x32ToM mask)) + // match: (Rsh32Ux32 x y) + // cond: shiftIsBounded(v) + // result: (SHRL x y) for { x := v_0 y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VPSHRDVWMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHRL) + v.AddArg2(x, y) return true } + return false } -func rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromInt16x8(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpRsh32Ux64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftRightAndFillUpperFromInt16x8 x y z mask) - // result: (VPSHRDVWMasked128 x y z (VPMOVVec16x8ToM mask)) + // match: (Rsh32Ux64 x y) + // cond: !shiftIsBounded(v) + // result: (ANDL (SHRL x y) (SBBLcarrymask (CMPQconst y [32]))) for { + t := v.Type x := v_0 y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VPSHRDVWMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDL) + v0 := b.NewValue0(v.Pos, OpAMD64SHRL, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) + v2.AuxInt = int32ToAuxInt(32) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) return true } -} -func rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromInt32x16(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftRightAndFillUpperFromInt32x16 x y z mask) - // result: (VPSHRDVDMasked512 x y z (VPMOVVec32x16ToM mask)) + // match: (Rsh32Ux64 x y) + // cond: shiftIsBounded(v) + // result: (SHRL x y) for { x := v_0 y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VPSHRDVDMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHRL) + v.AddArg2(x, y) return true } + return false } -func rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromInt32x4(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpRsh32Ux8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftRightAndFillUpperFromInt32x4 x y z mask) - // result: (VPSHRDVDMasked128 x y z (VPMOVVec32x4ToM mask)) + // match: (Rsh32Ux8 x y) + // cond: !shiftIsBounded(v) + // result: (ANDL (SHRL x y) (SBBLcarrymask (CMPBconst y [32]))) for { + t := v.Type x := v_0 y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VPSHRDVDMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDL) + v0 := b.NewValue0(v.Pos, OpAMD64SHRL, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) + v2.AuxInt = int8ToAuxInt(32) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) return true } -} -func rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromInt32x8(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftRightAndFillUpperFromInt32x8 x y z mask) - // result: (VPSHRDVDMasked256 x y z (VPMOVVec32x8ToM mask)) + // match: (Rsh32Ux8 x y) + // cond: shiftIsBounded(v) + // result: (SHRL x y) for { x := v_0 y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VPSHRDVDMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHRL) + v.AddArg2(x, y) return true } + return false } -func rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromInt64x2(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpRsh32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftRightAndFillUpperFromInt64x2 x y z mask) - // result: (VPSHRDVQMasked128 x y z (VPMOVVec64x2ToM mask)) + // match: (Rsh32x16 x y) + // cond: !shiftIsBounded(v) + // result: (SARL x (ORL y (NOTL (SBBLcarrymask (CMPWconst y [32]))))) for { + t := v.Type x := v_0 y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VPSHRDVQMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARL) + v.Type = t + v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type) + v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type) + v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type) + v3 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) + v3.AuxInt = int16ToAuxInt(32) + v3.AddArg(y) + v2.AddArg(v3) + v1.AddArg(v2) + v0.AddArg2(y, v1) + v.AddArg2(x, v0) return true } -} -func rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromInt64x4(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftRightAndFillUpperFromInt64x4 x y z mask) - // result: (VPSHRDVQMasked256 x y z (VPMOVVec64x4ToM mask)) + // match: (Rsh32x16 x y) + // cond: shiftIsBounded(v) + // result: (SARL x y) for { x := v_0 y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VPSHRDVQMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARL) + v.AddArg2(x, y) return true } + return false } -func rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromInt64x8(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpRsh32x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftRightAndFillUpperFromInt64x8 x y z mask) - // result: (VPSHRDVQMasked512 x y z (VPMOVVec64x8ToM mask)) + // match: (Rsh32x32 x y) + // cond: !shiftIsBounded(v) + // result: (SARL x (ORL y (NOTL (SBBLcarrymask (CMPLconst y [32]))))) for { + t := v.Type x := v_0 y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VPSHRDVQMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARL) + v.Type = t + v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type) + v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type) + v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type) + v3 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) + v3.AuxInt = int32ToAuxInt(32) + v3.AddArg(y) + v2.AddArg(v3) + v1.AddArg(v2) + v0.AddArg2(y, v1) + v.AddArg2(x, v0) return true } -} -func rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromUint16x16(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftRightAndFillUpperFromUint16x16 x y z mask) - // result: (VPSHRDVWMasked256 x y z (VPMOVVec16x16ToM mask)) + // match: (Rsh32x32 x y) + // cond: shiftIsBounded(v) + // result: (SARL x y) for { x := v_0 y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VPSHRDVWMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARL) + v.AddArg2(x, y) return true } + return false } -func rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromUint16x32(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpRsh32x64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftRightAndFillUpperFromUint16x32 x y z mask) - // result: (VPSHRDVWMasked512 x y z (VPMOVVec16x32ToM mask)) + // match: (Rsh32x64 x y) + // cond: !shiftIsBounded(v) + // result: (SARL x (ORQ y (NOTQ (SBBQcarrymask (CMPQconst y [32]))))) for { + t := v.Type x := v_0 y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VPSHRDVWMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARL) + v.Type = t + v0 := b.NewValue0(v.Pos, OpAMD64ORQ, y.Type) + v1 := b.NewValue0(v.Pos, OpAMD64NOTQ, y.Type) + v2 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, y.Type) + v3 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) + v3.AuxInt = int32ToAuxInt(32) + v3.AddArg(y) + v2.AddArg(v3) + v1.AddArg(v2) + v0.AddArg2(y, v1) + v.AddArg2(x, v0) return true } -} -func rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromUint16x8(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftRightAndFillUpperFromUint16x8 x y z mask) - // result: (VPSHRDVWMasked128 x y z (VPMOVVec16x8ToM mask)) + // match: (Rsh32x64 x y) + // cond: shiftIsBounded(v) + // result: (SARL x y) for { x := v_0 y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VPSHRDVWMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARL) + v.AddArg2(x, y) return true } + return false } -func rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromUint32x16(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpRsh32x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftRightAndFillUpperFromUint32x16 x y z mask) - // result: (VPSHRDVDMasked512 x y z (VPMOVVec32x16ToM mask)) + // match: (Rsh32x8 x y) + // cond: !shiftIsBounded(v) + // result: (SARL x (ORL y (NOTL (SBBLcarrymask (CMPBconst y [32]))))) for { + t := v.Type x := v_0 y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VPSHRDVDMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARL) + v.Type = t + v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type) + v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type) + v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type) + v3 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) + v3.AuxInt = int8ToAuxInt(32) + v3.AddArg(y) + v2.AddArg(v3) + v1.AddArg(v2) + v0.AddArg2(y, v1) + v.AddArg2(x, v0) return true } -} -func rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromUint32x4(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftRightAndFillUpperFromUint32x4 x y z mask) - // result: (VPSHRDVDMasked128 x y z (VPMOVVec32x4ToM mask)) + // match: (Rsh32x8 x y) + // cond: shiftIsBounded(v) + // result: (SARL x y) for { x := v_0 y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VPSHRDVDMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARL) + v.AddArg2(x, y) return true } + return false } -func rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromUint32x8(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpRsh64Ux16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftRightAndFillUpperFromUint32x8 x y z mask) - // result: (VPSHRDVDMasked256 x y z (VPMOVVec32x8ToM mask)) + // match: (Rsh64Ux16 x y) + // cond: !shiftIsBounded(v) + // result: (ANDQ (SHRQ x y) (SBBQcarrymask (CMPWconst y [64]))) for { + t := v.Type x := v_0 y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VPSHRDVDMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDQ) + v0 := b.NewValue0(v.Pos, OpAMD64SHRQ, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) + v2.AuxInt = int16ToAuxInt(64) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) return true } -} -func rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromUint64x2(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftRightAndFillUpperFromUint64x2 x y z mask) - // result: (VPSHRDVQMasked128 x y z (VPMOVVec64x2ToM mask)) + // match: (Rsh64Ux16 x y) + // cond: shiftIsBounded(v) + // result: (SHRQ x y) for { x := v_0 y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VPSHRDVQMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHRQ) + v.AddArg2(x, y) return true } + return false } -func rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromUint64x4(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpRsh64Ux32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftRightAndFillUpperFromUint64x4 x y z mask) - // result: (VPSHRDVQMasked256 x y z (VPMOVVec64x4ToM mask)) + // match: (Rsh64Ux32 x y) + // cond: !shiftIsBounded(v) + // result: (ANDQ (SHRQ x y) (SBBQcarrymask (CMPLconst y [64]))) for { + t := v.Type x := v_0 y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VPSHRDVQMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDQ) + v0 := b.NewValue0(v.Pos, OpAMD64SHRQ, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) + v2.AuxInt = int32ToAuxInt(64) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) return true } -} -func rewriteValueAMD64_OpMaskedShiftRightAndFillUpperFromUint64x8(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftRightAndFillUpperFromUint64x8 x y z mask) - // result: (VPSHRDVQMasked512 x y z (VPMOVVec64x8ToM mask)) + // match: (Rsh64Ux32 x y) + // cond: shiftIsBounded(v) + // result: (SHRQ x y) for { x := v_0 y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VPSHRDVQMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHRQ) + v.AddArg2(x, y) return true } + return false } -func rewriteValueAMD64_OpMaskedShiftRightInt16x16(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpRsh64Ux64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftRightInt16x16 x y mask) - // result: (VPSRLVWMasked256 x y (VPMOVVec16x16ToM mask)) + // match: (Rsh64Ux64 x y) + // cond: !shiftIsBounded(v) + // result: (ANDQ (SHRQ x y) (SBBQcarrymask (CMPQconst y [64]))) for { + t := v.Type x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSRLVWMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDQ) + v0 := b.NewValue0(v.Pos, OpAMD64SHRQ, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) + v2.AuxInt = int32ToAuxInt(64) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) return true } -} -func rewriteValueAMD64_OpMaskedShiftRightInt16x32(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftRightInt16x32 x y mask) - // result: (VPSRLVWMasked512 x y (VPMOVVec16x32ToM mask)) + // match: (Rsh64Ux64 x y) + // cond: shiftIsBounded(v) + // result: (SHRQ x y) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSRLVWMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHRQ) + v.AddArg2(x, y) return true } + return false } -func rewriteValueAMD64_OpMaskedShiftRightInt16x8(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpRsh64Ux8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftRightInt16x8 x y mask) - // result: (VPSRLVWMasked128 x y (VPMOVVec16x8ToM mask)) + // match: (Rsh64Ux8 x y) + // cond: !shiftIsBounded(v) + // result: (ANDQ (SHRQ x y) (SBBQcarrymask (CMPBconst y [64]))) for { + t := v.Type x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSRLVWMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDQ) + v0 := b.NewValue0(v.Pos, OpAMD64SHRQ, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) + v2.AuxInt = int8ToAuxInt(64) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) return true } -} -func rewriteValueAMD64_OpMaskedShiftRightInt32x16(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftRightInt32x16 x y mask) - // result: (VPSRLVDMasked512 x y (VPMOVVec32x16ToM mask)) + // match: (Rsh64Ux8 x y) + // cond: shiftIsBounded(v) + // result: (SHRQ x y) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSRLVDMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHRQ) + v.AddArg2(x, y) return true } + return false } -func rewriteValueAMD64_OpMaskedShiftRightInt32x4(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpRsh64x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftRightInt32x4 x y mask) - // result: (VPSRLVDMasked128 x y (VPMOVVec32x4ToM mask)) + // match: (Rsh64x16 x y) + // cond: !shiftIsBounded(v) + // result: (SARQ x (ORL y (NOTL (SBBLcarrymask (CMPWconst y [64]))))) + for { + t := v.Type + x := v_0 + y := v_1 + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARQ) + v.Type = t + v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type) + v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type) + v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type) + v3 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) + v3.AuxInt = int16ToAuxInt(64) + v3.AddArg(y) + v2.AddArg(v3) + v1.AddArg(v2) + v0.AddArg2(y, v1) + v.AddArg2(x, v0) + return true + } + // match: (Rsh64x16 x y) + // cond: shiftIsBounded(v) + // result: (SARQ x y) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSRLVDMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARQ) + v.AddArg2(x, y) return true } + return false } -func rewriteValueAMD64_OpMaskedShiftRightInt32x8(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpRsh64x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftRightInt32x8 x y mask) - // result: (VPSRLVDMasked256 x y (VPMOVVec32x8ToM mask)) + // match: (Rsh64x32 x y) + // cond: !shiftIsBounded(v) + // result: (SARQ x (ORL y (NOTL (SBBLcarrymask (CMPLconst y [64]))))) for { + t := v.Type x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSRLVDMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARQ) + v.Type = t + v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type) + v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type) + v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type) + v3 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) + v3.AuxInt = int32ToAuxInt(64) + v3.AddArg(y) + v2.AddArg(v3) + v1.AddArg(v2) + v0.AddArg2(y, v1) + v.AddArg2(x, v0) return true } -} -func rewriteValueAMD64_OpMaskedShiftRightInt64x2(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftRightInt64x2 x y mask) - // result: (VPSRLVQMasked128 x y (VPMOVVec64x2ToM mask)) + // match: (Rsh64x32 x y) + // cond: shiftIsBounded(v) + // result: (SARQ x y) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSRLVQMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARQ) + v.AddArg2(x, y) return true } + return false } -func rewriteValueAMD64_OpMaskedShiftRightInt64x4(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpRsh64x64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftRightInt64x4 x y mask) - // result: (VPSRLVQMasked256 x y (VPMOVVec64x4ToM mask)) + // match: (Rsh64x64 x y) + // cond: !shiftIsBounded(v) + // result: (SARQ x (ORQ y (NOTQ (SBBQcarrymask (CMPQconst y [64]))))) for { + t := v.Type x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSRLVQMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARQ) + v.Type = t + v0 := b.NewValue0(v.Pos, OpAMD64ORQ, y.Type) + v1 := b.NewValue0(v.Pos, OpAMD64NOTQ, y.Type) + v2 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, y.Type) + v3 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) + v3.AuxInt = int32ToAuxInt(64) + v3.AddArg(y) + v2.AddArg(v3) + v1.AddArg(v2) + v0.AddArg2(y, v1) + v.AddArg2(x, v0) return true } -} -func rewriteValueAMD64_OpMaskedShiftRightInt64x8(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftRightInt64x8 x y mask) - // result: (VPSRLVQMasked512 x y (VPMOVVec64x8ToM mask)) + // match: (Rsh64x64 x y) + // cond: shiftIsBounded(v) + // result: (SARQ x y) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSRLVQMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARQ) + v.AddArg2(x, y) return true } + return false } -func rewriteValueAMD64_OpMaskedShiftRightSignExtendedInt16x16(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpRsh64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftRightSignExtendedInt16x16 x y mask) - // result: (VPSRAVWMasked256 x y (VPMOVVec16x16ToM mask)) + // match: (Rsh64x8 x y) + // cond: !shiftIsBounded(v) + // result: (SARQ x (ORL y (NOTL (SBBLcarrymask (CMPBconst y [64]))))) for { + t := v.Type x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSRAVWMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARQ) + v.Type = t + v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type) + v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type) + v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type) + v3 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) + v3.AuxInt = int8ToAuxInt(64) + v3.AddArg(y) + v2.AddArg(v3) + v1.AddArg(v2) + v0.AddArg2(y, v1) + v.AddArg2(x, v0) return true } -} -func rewriteValueAMD64_OpMaskedShiftRightSignExtendedInt16x32(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftRightSignExtendedInt16x32 x y mask) - // result: (VPSRAVWMasked512 x y (VPMOVVec16x32ToM mask)) + // match: (Rsh64x8 x y) + // cond: shiftIsBounded(v) + // result: (SARQ x y) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSRAVWMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARQ) + v.AddArg2(x, y) return true } + return false } -func rewriteValueAMD64_OpMaskedShiftRightSignExtendedInt16x8(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpRsh8Ux16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftRightSignExtendedInt16x8 x y mask) - // result: (VPSRAVWMasked128 x y (VPMOVVec16x8ToM mask)) + // match: (Rsh8Ux16 x y) + // cond: !shiftIsBounded(v) + // result: (ANDL (SHRB x y) (SBBLcarrymask (CMPWconst y [8]))) for { + t := v.Type x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSRAVWMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDL) + v0 := b.NewValue0(v.Pos, OpAMD64SHRB, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) + v2.AuxInt = int16ToAuxInt(8) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) return true } -} -func rewriteValueAMD64_OpMaskedShiftRightSignExtendedInt32x16(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftRightSignExtendedInt32x16 x y mask) - // result: (VPSRAVDMasked512 x y (VPMOVVec32x16ToM mask)) + // match: (Rsh8Ux16 x y) + // cond: shiftIsBounded(v) + // result: (SHRB x y) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSRAVDMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHRB) + v.AddArg2(x, y) return true } + return false } -func rewriteValueAMD64_OpMaskedShiftRightSignExtendedInt32x4(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpRsh8Ux32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftRightSignExtendedInt32x4 x y mask) - // result: (VPSRAVDMasked128 x y (VPMOVVec32x4ToM mask)) + // match: (Rsh8Ux32 x y) + // cond: !shiftIsBounded(v) + // result: (ANDL (SHRB x y) (SBBLcarrymask (CMPLconst y [8]))) for { + t := v.Type x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSRAVDMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDL) + v0 := b.NewValue0(v.Pos, OpAMD64SHRB, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) + v2.AuxInt = int32ToAuxInt(8) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) return true } -} -func rewriteValueAMD64_OpMaskedShiftRightSignExtendedInt32x8(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftRightSignExtendedInt32x8 x y mask) - // result: (VPSRAVDMasked256 x y (VPMOVVec32x8ToM mask)) + // match: (Rsh8Ux32 x y) + // cond: shiftIsBounded(v) + // result: (SHRB x y) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSRAVDMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHRB) + v.AddArg2(x, y) return true } + return false } -func rewriteValueAMD64_OpMaskedShiftRightSignExtendedInt64x2(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpRsh8Ux64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftRightSignExtendedInt64x2 x y mask) - // result: (VPSRAVQMasked128 x y (VPMOVVec64x2ToM mask)) + // match: (Rsh8Ux64 x y) + // cond: !shiftIsBounded(v) + // result: (ANDL (SHRB x y) (SBBLcarrymask (CMPQconst y [8]))) for { + t := v.Type x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSRAVQMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDL) + v0 := b.NewValue0(v.Pos, OpAMD64SHRB, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) + v2.AuxInt = int32ToAuxInt(8) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) return true } -} -func rewriteValueAMD64_OpMaskedShiftRightSignExtendedInt64x4(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftRightSignExtendedInt64x4 x y mask) - // result: (VPSRAVQMasked256 x y (VPMOVVec64x4ToM mask)) + // match: (Rsh8Ux64 x y) + // cond: shiftIsBounded(v) + // result: (SHRB x y) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSRAVQMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHRB) + v.AddArg2(x, y) return true } + return false } -func rewriteValueAMD64_OpMaskedShiftRightSignExtendedInt64x8(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpRsh8Ux8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftRightSignExtendedInt64x8 x y mask) - // result: (VPSRAVQMasked512 x y (VPMOVVec64x8ToM mask)) + // match: (Rsh8Ux8 x y) + // cond: !shiftIsBounded(v) + // result: (ANDL (SHRB x y) (SBBLcarrymask (CMPBconst y [8]))) for { + t := v.Type x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSRAVQMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64ANDL) + v0 := b.NewValue0(v.Pos, OpAMD64SHRB, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) + v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) + v2.AuxInt = int8ToAuxInt(8) + v2.AddArg(y) + v1.AddArg(v2) + v.AddArg2(v0, v1) return true } -} -func rewriteValueAMD64_OpMaskedShiftRightSignExtendedUint16x16(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftRightSignExtendedUint16x16 x y mask) - // result: (VPSRAVWMasked256 x y (VPMOVVec16x16ToM mask)) + // match: (Rsh8Ux8 x y) + // cond: shiftIsBounded(v) + // result: (SHRB x y) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSRAVWMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SHRB) + v.AddArg2(x, y) return true } + return false } -func rewriteValueAMD64_OpMaskedShiftRightSignExtendedUint16x32(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpRsh8x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftRightSignExtendedUint16x32 x y mask) - // result: (VPSRAVWMasked512 x y (VPMOVVec16x32ToM mask)) + // match: (Rsh8x16 x y) + // cond: !shiftIsBounded(v) + // result: (SARB x (ORL y (NOTL (SBBLcarrymask (CMPWconst y [8]))))) for { + t := v.Type x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSRAVWMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARB) + v.Type = t + v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type) + v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type) + v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type) + v3 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) + v3.AuxInt = int16ToAuxInt(8) + v3.AddArg(y) + v2.AddArg(v3) + v1.AddArg(v2) + v0.AddArg2(y, v1) + v.AddArg2(x, v0) return true } -} -func rewriteValueAMD64_OpMaskedShiftRightSignExtendedUint16x8(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftRightSignExtendedUint16x8 x y mask) - // result: (VPSRAVWMasked128 x y (VPMOVVec16x8ToM mask)) + // match: (Rsh8x16 x y) + // cond: shiftIsBounded(v) + // result: (SARB x y) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSRAVWMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARB) + v.AddArg2(x, y) return true } + return false } -func rewriteValueAMD64_OpMaskedShiftRightSignExtendedUint32x16(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpRsh8x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftRightSignExtendedUint32x16 x y mask) - // result: (VPSRAVDMasked512 x y (VPMOVVec32x16ToM mask)) + // match: (Rsh8x32 x y) + // cond: !shiftIsBounded(v) + // result: (SARB x (ORL y (NOTL (SBBLcarrymask (CMPLconst y [8]))))) for { + t := v.Type x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSRAVDMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARB) + v.Type = t + v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type) + v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type) + v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type) + v3 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) + v3.AuxInt = int32ToAuxInt(8) + v3.AddArg(y) + v2.AddArg(v3) + v1.AddArg(v2) + v0.AddArg2(y, v1) + v.AddArg2(x, v0) return true } -} -func rewriteValueAMD64_OpMaskedShiftRightSignExtendedUint32x4(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftRightSignExtendedUint32x4 x y mask) - // result: (VPSRAVDMasked128 x y (VPMOVVec32x4ToM mask)) + // match: (Rsh8x32 x y) + // cond: shiftIsBounded(v) + // result: (SARB x y) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSRAVDMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARB) + v.AddArg2(x, y) return true } + return false } -func rewriteValueAMD64_OpMaskedShiftRightSignExtendedUint32x8(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpRsh8x64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftRightSignExtendedUint32x8 x y mask) - // result: (VPSRAVDMasked256 x y (VPMOVVec32x8ToM mask)) + // match: (Rsh8x64 x y) + // cond: !shiftIsBounded(v) + // result: (SARB x (ORQ y (NOTQ (SBBQcarrymask (CMPQconst y [8]))))) for { + t := v.Type x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSRAVDMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARB) + v.Type = t + v0 := b.NewValue0(v.Pos, OpAMD64ORQ, y.Type) + v1 := b.NewValue0(v.Pos, OpAMD64NOTQ, y.Type) + v2 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, y.Type) + v3 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) + v3.AuxInt = int32ToAuxInt(8) + v3.AddArg(y) + v2.AddArg(v3) + v1.AddArg(v2) + v0.AddArg2(y, v1) + v.AddArg2(x, v0) return true } -} -func rewriteValueAMD64_OpMaskedShiftRightSignExtendedUint64x2(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftRightSignExtendedUint64x2 x y mask) - // result: (VPSRAVQMasked128 x y (VPMOVVec64x2ToM mask)) + // match: (Rsh8x64 x y) + // cond: shiftIsBounded(v) + // result: (SARB x y) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSRAVQMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARB) + v.AddArg2(x, y) return true } + return false } -func rewriteValueAMD64_OpMaskedShiftRightSignExtendedUint64x4(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpRsh8x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftRightSignExtendedUint64x4 x y mask) - // result: (VPSRAVQMasked256 x y (VPMOVVec64x4ToM mask)) + // match: (Rsh8x8 x y) + // cond: !shiftIsBounded(v) + // result: (SARB x (ORL y (NOTL (SBBLcarrymask (CMPBconst y [8]))))) for { + t := v.Type x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSRAVQMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + if !(!shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARB) + v.Type = t + v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type) + v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type) + v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type) + v3 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) + v3.AuxInt = int8ToAuxInt(8) + v3.AddArg(y) + v2.AddArg(v3) + v1.AddArg(v2) + v0.AddArg2(y, v1) + v.AddArg2(x, v0) return true } -} -func rewriteValueAMD64_OpMaskedShiftRightSignExtendedUint64x8(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedShiftRightSignExtendedUint64x8 x y mask) - // result: (VPSRAVQMasked512 x y (VPMOVVec64x8ToM mask)) + // match: (Rsh8x8 x y) + // cond: shiftIsBounded(v) + // result: (SARB x y) for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSRAVQMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + x := v_0 + y := v_1 + if !(shiftIsBounded(v)) { + break + } + v.reset(OpAMD64SARB) + v.AddArg2(x, y) return true } + return false } -func rewriteValueAMD64_OpMaskedShiftRightUint16x16(v *Value) bool { +func rewriteValueAMD64_OpSaturatedAddMaskedInt16x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftRightUint16x16 x y mask) - // result: (VPSRLVWMasked256 x y (VPMOVVec16x16ToM mask)) + // match: (SaturatedAddMaskedInt16x16 x y mask) + // result: (VPADDSWMasked256 x y (VPMOVVec16x16ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSRLVWMasked256) + v.reset(OpAMD64VPADDSWMasked256) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedShiftRightUint16x32(v *Value) bool { +func rewriteValueAMD64_OpSaturatedAddMaskedInt16x32(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftRightUint16x32 x y mask) - // result: (VPSRLVWMasked512 x y (VPMOVVec16x32ToM mask)) + // match: (SaturatedAddMaskedInt16x32 x y mask) + // result: (VPADDSWMasked512 x y (VPMOVVec16x32ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSRLVWMasked512) + v.reset(OpAMD64VPADDSWMasked512) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedShiftRightUint16x8(v *Value) bool { +func rewriteValueAMD64_OpSaturatedAddMaskedInt16x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftRightUint16x8 x y mask) - // result: (VPSRLVWMasked128 x y (VPMOVVec16x8ToM mask)) + // match: (SaturatedAddMaskedInt16x8 x y mask) + // result: (VPADDSWMasked128 x y (VPMOVVec16x8ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSRLVWMasked128) + v.reset(OpAMD64VPADDSWMasked128) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedShiftRightUint32x16(v *Value) bool { +func rewriteValueAMD64_OpSaturatedAddMaskedInt8x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftRightUint32x16 x y mask) - // result: (VPSRLVDMasked512 x y (VPMOVVec32x16ToM mask)) + // match: (SaturatedAddMaskedInt8x16 x y mask) + // result: (VPADDSBMasked128 x y (VPMOVVec8x16ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSRLVDMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v.reset(OpAMD64VPADDSBMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedShiftRightUint32x4(v *Value) bool { +func rewriteValueAMD64_OpSaturatedAddMaskedInt8x32(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftRightUint32x4 x y mask) - // result: (VPSRLVDMasked128 x y (VPMOVVec32x4ToM mask)) + // match: (SaturatedAddMaskedInt8x32 x y mask) + // result: (VPADDSBMasked256 x y (VPMOVVec8x32ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSRLVDMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VPADDSBMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedShiftRightUint32x8(v *Value) bool { +func rewriteValueAMD64_OpSaturatedAddMaskedInt8x64(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftRightUint32x8 x y mask) - // result: (VPSRLVDMasked256 x y (VPMOVVec32x8ToM mask)) + // match: (SaturatedAddMaskedInt8x64 x y mask) + // result: (VPADDSBMasked512 x y (VPMOVVec8x64ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSRLVDMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v.reset(OpAMD64VPADDSBMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedShiftRightUint64x2(v *Value) bool { +func rewriteValueAMD64_OpSaturatedAddMaskedUint16x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftRightUint64x2 x y mask) - // result: (VPSRLVQMasked128 x y (VPMOVVec64x2ToM mask)) + // match: (SaturatedAddMaskedUint16x16 x y mask) + // result: (VPADDSWMasked256 x y (VPMOVVec16x16ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSRLVQMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v.reset(OpAMD64VPADDSWMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedShiftRightUint64x4(v *Value) bool { +func rewriteValueAMD64_OpSaturatedAddMaskedUint16x32(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftRightUint64x4 x y mask) - // result: (VPSRLVQMasked256 x y (VPMOVVec64x4ToM mask)) + // match: (SaturatedAddMaskedUint16x32 x y mask) + // result: (VPADDSWMasked512 x y (VPMOVVec16x32ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSRLVQMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VPADDSWMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedShiftRightUint64x8(v *Value) bool { +func rewriteValueAMD64_OpSaturatedAddMaskedUint16x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedShiftRightUint64x8 x y mask) - // result: (VPSRLVQMasked512 x y (VPMOVVec64x8ToM mask)) + // match: (SaturatedAddMaskedUint16x8 x y mask) + // result: (VPADDSWMasked128 x y (VPMOVVec16x8ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSRLVQMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v.reset(OpAMD64VPADDSWMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedSqrtFloat32x16(v *Value) bool { +func rewriteValueAMD64_OpSaturatedAddMaskedUint8x16(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedSqrtFloat32x16 x mask) - // result: (VSQRTPSMasked512 x (VPMOVVec32x16ToM mask)) + // match: (SaturatedAddMaskedUint8x16 x y mask) + // result: (VPADDSBMasked128 x y (VPMOVVec8x16ToM mask)) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VSQRTPSMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPADDSBMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg2(x, v0) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedSqrtFloat32x4(v *Value) bool { +func rewriteValueAMD64_OpSaturatedAddMaskedUint8x32(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedSqrtFloat32x4 x mask) - // result: (VSQRTPSMasked128 x (VPMOVVec32x4ToM mask)) + // match: (SaturatedAddMaskedUint8x32 x y mask) + // result: (VPADDSBMasked256 x y (VPMOVVec8x32ToM mask)) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VSQRTPSMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPADDSBMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg2(x, v0) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedSqrtFloat32x8(v *Value) bool { +func rewriteValueAMD64_OpSaturatedAddMaskedUint8x64(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedSqrtFloat32x8 x mask) - // result: (VSQRTPSMasked256 x (VPMOVVec32x8ToM mask)) + // match: (SaturatedAddMaskedUint8x64 x y mask) + // result: (VPADDSBMasked512 x y (VPMOVVec8x64ToM mask)) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VSQRTPSMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPADDSBMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg2(x, v0) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedSqrtFloat64x2(v *Value) bool { +func rewriteValueAMD64_OpSaturatedPairDotProdAccumulateMaskedInt32x16(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedSqrtFloat64x2 x mask) - // result: (VSQRTPDMasked128 x (VPMOVVec64x2ToM mask)) + // match: (SaturatedPairDotProdAccumulateMaskedInt32x16 x y z mask) + // result: (VPDPWSSDSMasked512 x y z (VPMOVVec32x16ToM mask)) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VSQRTPDMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPDPWSSDSMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg2(x, v0) + v.AddArg4(x, y, z, v0) return true } } -func rewriteValueAMD64_OpMaskedSqrtFloat64x4(v *Value) bool { +func rewriteValueAMD64_OpSaturatedPairDotProdAccumulateMaskedInt32x4(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedSqrtFloat64x4 x mask) - // result: (VSQRTPDMasked256 x (VPMOVVec64x4ToM mask)) + // match: (SaturatedPairDotProdAccumulateMaskedInt32x4 x y z mask) + // result: (VPDPWSSDSMasked128 x y z (VPMOVVec32x4ToM mask)) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VSQRTPDMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPDPWSSDSMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg2(x, v0) + v.AddArg4(x, y, z, v0) return true } } -func rewriteValueAMD64_OpMaskedSqrtFloat64x8(v *Value) bool { +func rewriteValueAMD64_OpSaturatedPairDotProdAccumulateMaskedInt32x8(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedSqrtFloat64x8 x mask) - // result: (VSQRTPDMasked512 x (VPMOVVec64x8ToM mask)) + // match: (SaturatedPairDotProdAccumulateMaskedInt32x8 x y z mask) + // result: (VPDPWSSDSMasked256 x y z (VPMOVVec32x8ToM mask)) for { x := v_0 - mask := v_1 - v.reset(OpAMD64VSQRTPDMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPDPWSSDSMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg2(x, v0) + v.AddArg4(x, y, z, v0) return true } } -func rewriteValueAMD64_OpMaskedSubFloat32x16(v *Value) bool { +func rewriteValueAMD64_OpSaturatedSubMaskedInt16x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedSubFloat32x16 x y mask) - // result: (VSUBPSMasked512 x y (VPMOVVec32x16ToM mask)) + // match: (SaturatedSubMaskedInt16x16 x y mask) + // result: (VPSUBSWMasked256 x y (VPMOVVec16x16ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VSUBPSMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v.reset(OpAMD64VPSUBSWMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedSubFloat32x4(v *Value) bool { +func rewriteValueAMD64_OpSaturatedSubMaskedInt16x32(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedSubFloat32x4 x y mask) - // result: (VSUBPSMasked128 x y (VPMOVVec32x4ToM mask)) + // match: (SaturatedSubMaskedInt16x32 x y mask) + // result: (VPSUBSWMasked512 x y (VPMOVVec16x32ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VSUBPSMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VPSUBSWMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedSubFloat32x8(v *Value) bool { +func rewriteValueAMD64_OpSaturatedSubMaskedInt16x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedSubFloat32x8 x y mask) - // result: (VSUBPSMasked256 x y (VPMOVVec32x8ToM mask)) + // match: (SaturatedSubMaskedInt16x8 x y mask) + // result: (VPSUBSWMasked128 x y (VPMOVVec16x8ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VSUBPSMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v.reset(OpAMD64VPSUBSWMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedSubFloat64x2(v *Value) bool { +func rewriteValueAMD64_OpSaturatedSubMaskedInt8x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedSubFloat64x2 x y mask) - // result: (VSUBPDMasked128 x y (VPMOVVec64x2ToM mask)) + // match: (SaturatedSubMaskedInt8x16 x y mask) + // result: (VPSUBSBMasked128 x y (VPMOVVec8x16ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VSUBPDMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v.reset(OpAMD64VPSUBSBMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedSubFloat64x4(v *Value) bool { +func rewriteValueAMD64_OpSaturatedSubMaskedInt8x32(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedSubFloat64x4 x y mask) - // result: (VSUBPDMasked256 x y (VPMOVVec64x4ToM mask)) + // match: (SaturatedSubMaskedInt8x32 x y mask) + // result: (VPSUBSBMasked256 x y (VPMOVVec8x32ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VSUBPDMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VPSUBSBMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedSubFloat64x8(v *Value) bool { +func rewriteValueAMD64_OpSaturatedSubMaskedInt8x64(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedSubFloat64x8 x y mask) - // result: (VSUBPDMasked512 x y (VPMOVVec64x8ToM mask)) + // match: (SaturatedSubMaskedInt8x64 x y mask) + // result: (VPSUBSBMasked512 x y (VPMOVVec8x64ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VSUBPDMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v.reset(OpAMD64VPSUBSBMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedSubInt16x16(v *Value) bool { +func rewriteValueAMD64_OpSaturatedSubMaskedUint16x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedSubInt16x16 x y mask) - // result: (VPSUBWMasked256 x y (VPMOVVec16x16ToM mask)) + // match: (SaturatedSubMaskedUint16x16 x y mask) + // result: (VPSUBSWMasked256 x y (VPMOVVec16x16ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSUBWMasked256) + v.reset(OpAMD64VPSUBSWMasked256) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedSubInt16x32(v *Value) bool { +func rewriteValueAMD64_OpSaturatedSubMaskedUint16x32(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedSubInt16x32 x y mask) - // result: (VPSUBWMasked512 x y (VPMOVVec16x32ToM mask)) + // match: (SaturatedSubMaskedUint16x32 x y mask) + // result: (VPSUBSWMasked512 x y (VPMOVVec16x32ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSUBWMasked512) + v.reset(OpAMD64VPSUBSWMasked512) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedSubInt16x8(v *Value) bool { +func rewriteValueAMD64_OpSaturatedSubMaskedUint16x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedSubInt16x8 x y mask) - // result: (VPSUBWMasked128 x y (VPMOVVec16x8ToM mask)) + // match: (SaturatedSubMaskedUint16x8 x y mask) + // result: (VPSUBSWMasked128 x y (VPMOVVec16x8ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSUBWMasked128) + v.reset(OpAMD64VPSUBSWMasked128) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedSubInt32x16(v *Value) bool { +func rewriteValueAMD64_OpSaturatedSubMaskedUint8x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedSubInt32x16 x y mask) - // result: (VPSUBDMasked512 x y (VPMOVVec32x16ToM mask)) + // match: (SaturatedSubMaskedUint8x16 x y mask) + // result: (VPSUBSBMasked128 x y (VPMOVVec8x16ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSUBDMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v.reset(OpAMD64VPSUBSBMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedSubInt32x4(v *Value) bool { +func rewriteValueAMD64_OpSaturatedSubMaskedUint8x32(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedSubInt32x4 x y mask) - // result: (VPSUBDMasked128 x y (VPMOVVec32x4ToM mask)) + // match: (SaturatedSubMaskedUint8x32 x y mask) + // result: (VPSUBSBMasked256 x y (VPMOVVec8x32ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSUBDMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VPSUBSBMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedSubInt32x8(v *Value) bool { +func rewriteValueAMD64_OpSaturatedSubMaskedUint8x64(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedSubInt32x8 x y mask) - // result: (VPSUBDMasked256 x y (VPMOVVec32x8ToM mask)) + // match: (SaturatedSubMaskedUint8x64 x y mask) + // result: (VPSUBSBMasked512 x y (VPMOVVec8x64ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSUBDMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v.reset(OpAMD64VPSUBSBMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedSubInt64x2(v *Value) bool { +func rewriteValueAMD64_OpSaturatedUnsignedSignedPairDotProdMaskedUint8x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedSubInt64x2 x y mask) - // result: (VPSUBQMasked128 x y (VPMOVVec64x2ToM mask)) + // match: (SaturatedUnsignedSignedPairDotProdMaskedUint8x16 x y mask) + // result: (VPMADDUBSWMasked128 x y (VPMOVVec16x8ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSUBQMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v.reset(OpAMD64VPMADDUBSWMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedSubInt64x4(v *Value) bool { +func rewriteValueAMD64_OpSaturatedUnsignedSignedPairDotProdMaskedUint8x32(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedSubInt64x4 x y mask) - // result: (VPSUBQMasked256 x y (VPMOVVec64x4ToM mask)) + // match: (SaturatedUnsignedSignedPairDotProdMaskedUint8x32 x y mask) + // result: (VPMADDUBSWMasked256 x y (VPMOVVec16x16ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSUBQMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VPMADDUBSWMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedSubInt64x8(v *Value) bool { +func rewriteValueAMD64_OpSaturatedUnsignedSignedPairDotProdMaskedUint8x64(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedSubInt64x8 x y mask) - // result: (VPSUBQMasked512 x y (VPMOVVec64x8ToM mask)) + // match: (SaturatedUnsignedSignedPairDotProdMaskedUint8x64 x y mask) + // result: (VPMADDUBSWMasked512 x y (VPMOVVec16x32ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSUBQMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v.reset(OpAMD64VPMADDUBSWMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMaskedSubInt8x16(v *Value) bool { +func rewriteValueAMD64_OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x16(v *Value) bool { + v_3 := v.Args[3] v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedSubInt8x16 x y mask) - // result: (VPSUBBMasked128 x y (VPMOVVec8x16ToM mask)) + // match: (SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x16 x y z mask) + // result: (VPDPBUSDSMasked512 x y z (VPMOVVec32x16ToM mask)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSUBBMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + z := v_2 + mask := v_3 + v.reset(OpAMD64VPDPBUSDSMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.AddArg4(x, y, z, v0) return true } } -func rewriteValueAMD64_OpMaskedSubInt8x32(v *Value) bool { +func rewriteValueAMD64_OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x4(v *Value) bool { + v_3 := v.Args[3] v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedSubInt8x32 x y mask) - // result: (VPSUBBMasked256 x y (VPMOVVec8x32ToM mask)) + // match: (SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x4 x y z mask) + // result: (VPDPBUSDSMasked128 x y z (VPMOVVec32x4ToM mask)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSUBBMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + z := v_2 + mask := v_3 + v.reset(OpAMD64VPDPBUSDSMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.AddArg4(x, y, z, v0) return true } } -func rewriteValueAMD64_OpMaskedSubInt8x64(v *Value) bool { +func rewriteValueAMD64_OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x8(v *Value) bool { + v_3 := v.Args[3] v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedSubInt8x64 x y mask) - // result: (VPSUBBMasked512 x y (VPMOVVec8x64ToM mask)) + // match: (SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x8 x y z mask) + // result: (VPDPBUSDSMasked256 x y z (VPMOVVec32x8ToM mask)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSUBBMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) + z := v_2 + mask := v_3 + v.reset(OpAMD64VPDPBUSDSMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.AddArg4(x, y, z, v0) return true } } -func rewriteValueAMD64_OpMaskedSubUint16x16(v *Value) bool { +func rewriteValueAMD64_OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x16(v *Value) bool { + v_3 := v.Args[3] v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedSubUint16x16 x y mask) - // result: (VPSUBWMasked256 x y (VPMOVVec16x16ToM mask)) + // match: (SaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x16 x y z mask) + // result: (VPDPBUSDSMasked512 x y z (VPMOVVec32x16ToM mask)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSUBWMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + z := v_2 + mask := v_3 + v.reset(OpAMD64VPDPBUSDSMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.AddArg4(x, y, z, v0) return true } } -func rewriteValueAMD64_OpMaskedSubUint16x32(v *Value) bool { +func rewriteValueAMD64_OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x4(v *Value) bool { + v_3 := v.Args[3] v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedSubUint16x32 x y mask) - // result: (VPSUBWMasked512 x y (VPMOVVec16x32ToM mask)) + // match: (SaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x4 x y z mask) + // result: (VPDPBUSDSMasked128 x y z (VPMOVVec32x4ToM mask)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSUBWMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + z := v_2 + mask := v_3 + v.reset(OpAMD64VPDPBUSDSMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.AddArg4(x, y, z, v0) return true } } -func rewriteValueAMD64_OpMaskedSubUint16x8(v *Value) bool { +func rewriteValueAMD64_OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x8(v *Value) bool { + v_3 := v.Args[3] v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MaskedSubUint16x8 x y mask) - // result: (VPSUBWMasked128 x y (VPMOVVec16x8ToM mask)) + // match: (SaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x8 x y z mask) + // result: (VPDPBUSDSMasked256 x y z (VPMOVVec32x8ToM mask)) for { x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSUBWMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + z := v_2 + mask := v_3 + v.reset(OpAMD64VPDPBUSDSMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.AddArg4(x, y, z, v0) return true } } -func rewriteValueAMD64_OpMaskedSubUint32x16(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpSelect0(v *Value) bool { v_0 := v.Args[0] b := v.Block - // match: (MaskedSubUint32x16 x y mask) - // result: (VPSUBDMasked512 x y (VPMOVVec32x16ToM mask)) + typ := &b.Func.Config.Types + // match: (Select0 (Mul64uover x y)) + // result: (Select0 (MULQU x y)) for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSUBDMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + if v_0.Op != OpMul64uover { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + v.reset(OpSelect0) + v.Type = typ.UInt64 + v0 := b.NewValue0(v.Pos, OpAMD64MULQU, types.NewTuple(typ.UInt64, types.TypeFlags)) + v0.AddArg2(x, y) + v.AddArg(v0) return true } -} -func rewriteValueAMD64_OpMaskedSubUint32x4(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedSubUint32x4 x y mask) - // result: (VPSUBDMasked128 x y (VPMOVVec32x4ToM mask)) + // match: (Select0 (Mul32uover x y)) + // result: (Select0 (MULLU x y)) for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSUBDMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + if v_0.Op != OpMul32uover { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + v.reset(OpSelect0) + v.Type = typ.UInt32 + v0 := b.NewValue0(v.Pos, OpAMD64MULLU, types.NewTuple(typ.UInt32, types.TypeFlags)) + v0.AddArg2(x, y) + v.AddArg(v0) return true } -} -func rewriteValueAMD64_OpMaskedSubUint32x8(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedSubUint32x8 x y mask) - // result: (VPSUBDMasked256 x y (VPMOVVec32x8ToM mask)) + // match: (Select0 (Add64carry x y c)) + // result: (Select0 (ADCQ x y (Select1 (NEGLflags c)))) for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSUBDMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + if v_0.Op != OpAdd64carry { + break + } + c := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + v.reset(OpSelect0) + v.Type = typ.UInt64 + v0 := b.NewValue0(v.Pos, OpAMD64ADCQ, types.NewTuple(typ.UInt64, types.TypeFlags)) + v1 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) + v2 := b.NewValue0(v.Pos, OpAMD64NEGLflags, types.NewTuple(typ.UInt32, types.TypeFlags)) + v2.AddArg(c) + v1.AddArg(v2) + v0.AddArg3(x, y, v1) + v.AddArg(v0) + return true + } + // match: (Select0 (Sub64borrow x y c)) + // result: (Select0 (SBBQ x y (Select1 (NEGLflags c)))) + for { + if v_0.Op != OpSub64borrow { + break + } + c := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + v.reset(OpSelect0) + v.Type = typ.UInt64 + v0 := b.NewValue0(v.Pos, OpAMD64SBBQ, types.NewTuple(typ.UInt64, types.TypeFlags)) + v1 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) + v2 := b.NewValue0(v.Pos, OpAMD64NEGLflags, types.NewTuple(typ.UInt32, types.TypeFlags)) + v2.AddArg(c) + v1.AddArg(v2) + v0.AddArg3(x, y, v1) + v.AddArg(v0) + return true + } + // match: (Select0 (AddTupleFirst32 val tuple)) + // result: (ADDL val (Select0 tuple)) + for { + t := v.Type + if v_0.Op != OpAMD64AddTupleFirst32 { + break + } + tuple := v_0.Args[1] + val := v_0.Args[0] + v.reset(OpAMD64ADDL) + v0 := b.NewValue0(v.Pos, OpSelect0, t) + v0.AddArg(tuple) + v.AddArg2(val, v0) + return true + } + // match: (Select0 (AddTupleFirst64 val tuple)) + // result: (ADDQ val (Select0 tuple)) + for { + t := v.Type + if v_0.Op != OpAMD64AddTupleFirst64 { + break + } + tuple := v_0.Args[1] + val := v_0.Args[0] + v.reset(OpAMD64ADDQ) + v0 := b.NewValue0(v.Pos, OpSelect0, t) + v0.AddArg(tuple) + v.AddArg2(val, v0) + return true + } + // match: (Select0 a:(ADDQconstflags [c] x)) + // cond: a.Uses == 1 + // result: (ADDQconst [c] x) + for { + a := v_0 + if a.Op != OpAMD64ADDQconstflags { + break + } + c := auxIntToInt32(a.AuxInt) + x := a.Args[0] + if !(a.Uses == 1) { + break + } + v.reset(OpAMD64ADDQconst) + v.AuxInt = int32ToAuxInt(c) + v.AddArg(x) + return true + } + // match: (Select0 a:(ADDLconstflags [c] x)) + // cond: a.Uses == 1 + // result: (ADDLconst [c] x) + for { + a := v_0 + if a.Op != OpAMD64ADDLconstflags { + break + } + c := auxIntToInt32(a.AuxInt) + x := a.Args[0] + if !(a.Uses == 1) { + break + } + v.reset(OpAMD64ADDLconst) + v.AuxInt = int32ToAuxInt(c) + v.AddArg(x) return true } + return false } -func rewriteValueAMD64_OpMaskedSubUint64x2(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpSelect1(v *Value) bool { v_0 := v.Args[0] b := v.Block - // match: (MaskedSubUint64x2 x y mask) - // result: (VPSUBQMasked128 x y (VPMOVVec64x2ToM mask)) + typ := &b.Func.Config.Types + // match: (Select1 (Mul64uover x y)) + // result: (SETO (Select1 (MULQU x y))) for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSUBQMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + if v_0.Op != OpMul64uover { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + v.reset(OpAMD64SETO) + v0 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) + v1 := b.NewValue0(v.Pos, OpAMD64MULQU, types.NewTuple(typ.UInt64, types.TypeFlags)) + v1.AddArg2(x, y) + v0.AddArg(v1) + v.AddArg(v0) + return true + } + // match: (Select1 (Mul32uover x y)) + // result: (SETO (Select1 (MULLU x y))) + for { + if v_0.Op != OpMul32uover { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + v.reset(OpAMD64SETO) + v0 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) + v1 := b.NewValue0(v.Pos, OpAMD64MULLU, types.NewTuple(typ.UInt32, types.TypeFlags)) + v1.AddArg2(x, y) + v0.AddArg(v1) + v.AddArg(v0) + return true + } + // match: (Select1 (Add64carry x y c)) + // result: (NEGQ (SBBQcarrymask (Select1 (ADCQ x y (Select1 (NEGLflags c)))))) + for { + if v_0.Op != OpAdd64carry { + break + } + c := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + v.reset(OpAMD64NEGQ) + v.Type = typ.UInt64 + v0 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, typ.UInt64) + v1 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) + v2 := b.NewValue0(v.Pos, OpAMD64ADCQ, types.NewTuple(typ.UInt64, types.TypeFlags)) + v3 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) + v4 := b.NewValue0(v.Pos, OpAMD64NEGLflags, types.NewTuple(typ.UInt32, types.TypeFlags)) + v4.AddArg(c) + v3.AddArg(v4) + v2.AddArg3(x, y, v3) + v1.AddArg(v2) + v0.AddArg(v1) + v.AddArg(v0) return true } -} -func rewriteValueAMD64_OpMaskedSubUint64x4(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedSubUint64x4 x y mask) - // result: (VPSUBQMasked256 x y (VPMOVVec64x4ToM mask)) + // match: (Select1 (Sub64borrow x y c)) + // result: (NEGQ (SBBQcarrymask (Select1 (SBBQ x y (Select1 (NEGLflags c)))))) for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSUBQMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + if v_0.Op != OpSub64borrow { + break + } + c := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + v.reset(OpAMD64NEGQ) + v.Type = typ.UInt64 + v0 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, typ.UInt64) + v1 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) + v2 := b.NewValue0(v.Pos, OpAMD64SBBQ, types.NewTuple(typ.UInt64, types.TypeFlags)) + v3 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) + v4 := b.NewValue0(v.Pos, OpAMD64NEGLflags, types.NewTuple(typ.UInt32, types.TypeFlags)) + v4.AddArg(c) + v3.AddArg(v4) + v2.AddArg3(x, y, v3) + v1.AddArg(v2) + v0.AddArg(v1) + v.AddArg(v0) return true } -} -func rewriteValueAMD64_OpMaskedSubUint64x8(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedSubUint64x8 x y mask) - // result: (VPSUBQMasked512 x y (VPMOVVec64x8ToM mask)) + // match: (Select1 (NEGLflags (MOVQconst [0]))) + // result: (FlagEQ) for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSUBQMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + if v_0.Op != OpAMD64NEGLflags { + break + } + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpAMD64MOVQconst || auxIntToInt64(v_0_0.AuxInt) != 0 { + break + } + v.reset(OpAMD64FlagEQ) return true } -} -func rewriteValueAMD64_OpMaskedSubUint8x16(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedSubUint8x16 x y mask) - // result: (VPSUBBMasked128 x y (VPMOVVec8x16ToM mask)) + // match: (Select1 (NEGLflags (NEGQ (SBBQcarrymask x)))) + // result: x for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSUBBMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + if v_0.Op != OpAMD64NEGLflags { + break + } + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpAMD64NEGQ { + break + } + v_0_0_0 := v_0_0.Args[0] + if v_0_0_0.Op != OpAMD64SBBQcarrymask { + break + } + x := v_0_0_0.Args[0] + v.copyOf(x) return true } -} -func rewriteValueAMD64_OpMaskedSubUint8x32(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedSubUint8x32 x y mask) - // result: (VPSUBBMasked256 x y (VPMOVVec8x32ToM mask)) + // match: (Select1 (AddTupleFirst32 _ tuple)) + // result: (Select1 tuple) for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSUBBMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + if v_0.Op != OpAMD64AddTupleFirst32 { + break + } + tuple := v_0.Args[1] + v.reset(OpSelect1) + v.AddArg(tuple) return true } -} -func rewriteValueAMD64_OpMaskedSubUint8x64(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedSubUint8x64 x y mask) - // result: (VPSUBBMasked512 x y (VPMOVVec8x64ToM mask)) + // match: (Select1 (AddTupleFirst64 _ tuple)) + // result: (Select1 tuple) for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSUBBMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + if v_0.Op != OpAMD64AddTupleFirst64 { + break + } + tuple := v_0.Args[1] + v.reset(OpSelect1) + v.AddArg(tuple) return true } -} -func rewriteValueAMD64_OpMaskedTruncWithPrecisionFloat32x16(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedTruncWithPrecisionFloat32x16 [a] x mask) - // result: (VRNDSCALEPSMasked512 [a+3] x (VPMOVVec32x16ToM mask)) + // match: (Select1 a:(LoweredAtomicAnd64 ptr val mem)) + // cond: a.Uses == 1 && clobber(a) + // result: (ANDQlock ptr val mem) for { - a := auxIntToInt8(v.AuxInt) - x := v_0 - mask := v_1 - v.reset(OpAMD64VRNDSCALEPSMasked512) - v.AuxInt = int8ToAuxInt(a + 3) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + a := v_0 + if a.Op != OpAMD64LoweredAtomicAnd64 { + break + } + mem := a.Args[2] + ptr := a.Args[0] + val := a.Args[1] + if !(a.Uses == 1 && clobber(a)) { + break + } + v.reset(OpAMD64ANDQlock) + v.AddArg3(ptr, val, mem) return true } -} -func rewriteValueAMD64_OpMaskedTruncWithPrecisionFloat32x4(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedTruncWithPrecisionFloat32x4 [a] x mask) - // result: (VRNDSCALEPSMasked128 [a+3] x (VPMOVVec32x4ToM mask)) + // match: (Select1 a:(LoweredAtomicAnd32 ptr val mem)) + // cond: a.Uses == 1 && clobber(a) + // result: (ANDLlock ptr val mem) for { - a := auxIntToInt8(v.AuxInt) - x := v_0 - mask := v_1 - v.reset(OpAMD64VRNDSCALEPSMasked128) - v.AuxInt = int8ToAuxInt(a + 3) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + a := v_0 + if a.Op != OpAMD64LoweredAtomicAnd32 { + break + } + mem := a.Args[2] + ptr := a.Args[0] + val := a.Args[1] + if !(a.Uses == 1 && clobber(a)) { + break + } + v.reset(OpAMD64ANDLlock) + v.AddArg3(ptr, val, mem) return true } -} -func rewriteValueAMD64_OpMaskedTruncWithPrecisionFloat32x8(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedTruncWithPrecisionFloat32x8 [a] x mask) - // result: (VRNDSCALEPSMasked256 [a+3] x (VPMOVVec32x8ToM mask)) + // match: (Select1 a:(LoweredAtomicOr64 ptr val mem)) + // cond: a.Uses == 1 && clobber(a) + // result: (ORQlock ptr val mem) for { - a := auxIntToInt8(v.AuxInt) - x := v_0 - mask := v_1 - v.reset(OpAMD64VRNDSCALEPSMasked256) - v.AuxInt = int8ToAuxInt(a + 3) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + a := v_0 + if a.Op != OpAMD64LoweredAtomicOr64 { + break + } + mem := a.Args[2] + ptr := a.Args[0] + val := a.Args[1] + if !(a.Uses == 1 && clobber(a)) { + break + } + v.reset(OpAMD64ORQlock) + v.AddArg3(ptr, val, mem) return true } -} -func rewriteValueAMD64_OpMaskedTruncWithPrecisionFloat64x2(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedTruncWithPrecisionFloat64x2 [a] x mask) - // result: (VRNDSCALEPDMasked128 [a+3] x (VPMOVVec64x2ToM mask)) + // match: (Select1 a:(LoweredAtomicOr32 ptr val mem)) + // cond: a.Uses == 1 && clobber(a) + // result: (ORLlock ptr val mem) for { - a := auxIntToInt8(v.AuxInt) - x := v_0 - mask := v_1 - v.reset(OpAMD64VRNDSCALEPDMasked128) - v.AuxInt = int8ToAuxInt(a + 3) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + a := v_0 + if a.Op != OpAMD64LoweredAtomicOr32 { + break + } + mem := a.Args[2] + ptr := a.Args[0] + val := a.Args[1] + if !(a.Uses == 1 && clobber(a)) { + break + } + v.reset(OpAMD64ORLlock) + v.AddArg3(ptr, val, mem) return true } + return false } -func rewriteValueAMD64_OpMaskedTruncWithPrecisionFloat64x4(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpSelectN(v *Value) bool { v_0 := v.Args[0] b := v.Block - // match: (MaskedTruncWithPrecisionFloat64x4 [a] x mask) - // result: (VRNDSCALEPDMasked256 [a+3] x (VPMOVVec64x4ToM mask)) + config := b.Func.Config + // match: (SelectN [0] call:(CALLstatic {sym} s1:(MOVQstoreconst _ [sc] s2:(MOVQstore _ src s3:(MOVQstore _ dst mem))))) + // cond: sc.Val64() >= 0 && isSameCall(sym, "runtime.memmove") && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && isInlinableMemmove(dst, src, sc.Val64(), config) && clobber(s1, s2, s3, call) + // result: (Move [sc.Val64()] dst src mem) for { - a := auxIntToInt8(v.AuxInt) - x := v_0 - mask := v_1 - v.reset(OpAMD64VRNDSCALEPDMasked256) - v.AuxInt = int8ToAuxInt(a + 3) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + if auxIntToInt64(v.AuxInt) != 0 { + break + } + call := v_0 + if call.Op != OpAMD64CALLstatic || len(call.Args) != 1 { + break + } + sym := auxToCall(call.Aux) + s1 := call.Args[0] + if s1.Op != OpAMD64MOVQstoreconst { + break + } + sc := auxIntToValAndOff(s1.AuxInt) + _ = s1.Args[1] + s2 := s1.Args[1] + if s2.Op != OpAMD64MOVQstore { + break + } + _ = s2.Args[2] + src := s2.Args[1] + s3 := s2.Args[2] + if s3.Op != OpAMD64MOVQstore { + break + } + mem := s3.Args[2] + dst := s3.Args[1] + if !(sc.Val64() >= 0 && isSameCall(sym, "runtime.memmove") && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && isInlinableMemmove(dst, src, sc.Val64(), config) && clobber(s1, s2, s3, call)) { + break + } + v.reset(OpMove) + v.AuxInt = int64ToAuxInt(sc.Val64()) + v.AddArg3(dst, src, mem) return true } -} -func rewriteValueAMD64_OpMaskedTruncWithPrecisionFloat64x8(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MaskedTruncWithPrecisionFloat64x8 [a] x mask) - // result: (VRNDSCALEPDMasked512 [a+3] x (VPMOVVec64x8ToM mask)) + // match: (SelectN [0] call:(CALLstatic {sym} dst src (MOVQconst [sz]) mem)) + // cond: sz >= 0 && isSameCall(sym, "runtime.memmove") && call.Uses == 1 && isInlinableMemmove(dst, src, sz, config) && clobber(call) + // result: (Move [sz] dst src mem) for { - a := auxIntToInt8(v.AuxInt) - x := v_0 - mask := v_1 - v.reset(OpAMD64VRNDSCALEPDMasked512) - v.AuxInt = int8ToAuxInt(a + 3) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) + if auxIntToInt64(v.AuxInt) != 0 { + break + } + call := v_0 + if call.Op != OpAMD64CALLstatic || len(call.Args) != 4 { + break + } + sym := auxToCall(call.Aux) + mem := call.Args[3] + dst := call.Args[0] + src := call.Args[1] + call_2 := call.Args[2] + if call_2.Op != OpAMD64MOVQconst { + break + } + sz := auxIntToInt64(call_2.AuxInt) + if !(sz >= 0 && isSameCall(sym, "runtime.memmove") && call.Uses == 1 && isInlinableMemmove(dst, src, sz, config) && clobber(call)) { + break + } + v.reset(OpMove) + v.AuxInt = int64ToAuxInt(sz) + v.AddArg3(dst, src, mem) return true } + return false } -func rewriteValueAMD64_OpMaskedUnsignedSignedQuadDotProdAccumulateInt32x16(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpSet128Float32x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (MaskedUnsignedSignedQuadDotProdAccumulateInt32x16 x y z mask) - // result: (VPDPBUSDMasked512 x y z (VPMOVVec32x16ToM mask)) + // match: (Set128Float32x8 [a] x y) + // result: (VINSERTF128256 [a] x y) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VPDPBUSDMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + v.reset(OpAMD64VINSERTF128256) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpMaskedUnsignedSignedQuadDotProdAccumulateInt32x4(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpSet128Float64x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (MaskedUnsignedSignedQuadDotProdAccumulateInt32x4 x y z mask) - // result: (VPDPBUSDMasked128 x y z (VPMOVVec32x4ToM mask)) + // match: (Set128Float64x4 [a] x y) + // result: (VINSERTF128256 [a] x y) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VPDPBUSDMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + v.reset(OpAMD64VINSERTF128256) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpMaskedUnsignedSignedQuadDotProdAccumulateInt32x8(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpSet128Int16x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (MaskedUnsignedSignedQuadDotProdAccumulateInt32x8 x y z mask) - // result: (VPDPBUSDMasked256 x y z (VPMOVVec32x8ToM mask)) + // match: (Set128Int16x16 [a] x y) + // result: (VINSERTI128256 [a] x y) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VPDPBUSDMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + v.reset(OpAMD64VINSERTI128256) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpMaskedUnsignedSignedQuadDotProdAccumulateUint32x16(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpSet128Int32x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (MaskedUnsignedSignedQuadDotProdAccumulateUint32x16 x y z mask) - // result: (VPDPBUSDMasked512 x y z (VPMOVVec32x16ToM mask)) + // match: (Set128Int32x8 [a] x y) + // result: (VINSERTI128256 [a] x y) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VPDPBUSDMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + v.reset(OpAMD64VINSERTI128256) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpMaskedUnsignedSignedQuadDotProdAccumulateUint32x4(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpSet128Int64x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (MaskedUnsignedSignedQuadDotProdAccumulateUint32x4 x y z mask) - // result: (VPDPBUSDMasked128 x y z (VPMOVVec32x4ToM mask)) + // match: (Set128Int64x4 [a] x y) + // result: (VINSERTI128256 [a] x y) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VPDPBUSDMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + v.reset(OpAMD64VINSERTI128256) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpMaskedUnsignedSignedQuadDotProdAccumulateUint32x8(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpSet128Int8x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (MaskedUnsignedSignedQuadDotProdAccumulateUint32x8 x y z mask) - // result: (VPDPBUSDMasked256 x y z (VPMOVVec32x8ToM mask)) + // match: (Set128Int8x32 [a] x y) + // result: (VINSERTI128256 [a] x y) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - z := v_2 - mask := v_3 - v.reset(OpAMD64VPDPBUSDMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg4(x, y, z, v0) + v.reset(OpAMD64VINSERTI128256) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpMaskedXorInt32x16(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpSet128Uint16x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (MaskedXorInt32x16 x y mask) - // result: (VPXORDMasked512 x y (VPMOVVec32x16ToM mask)) + // match: (Set128Uint16x16 [a] x y) + // result: (VINSERTI128256 [a] x y) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPXORDMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VINSERTI128256) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpMaskedXorInt32x4(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpSet128Uint32x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (MaskedXorInt32x4 x y mask) - // result: (VPXORDMasked128 x y (VPMOVVec32x4ToM mask)) + // match: (Set128Uint32x8 [a] x y) + // result: (VINSERTI128256 [a] x y) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPXORDMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VINSERTI128256) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpMaskedXorInt32x8(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpSet128Uint64x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (MaskedXorInt32x8 x y mask) - // result: (VPXORDMasked256 x y (VPMOVVec32x8ToM mask)) + // match: (Set128Uint64x4 [a] x y) + // result: (VINSERTI128256 [a] x y) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPXORDMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VINSERTI128256) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpMaskedXorInt64x2(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpSet128Uint8x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (MaskedXorInt64x2 x y mask) - // result: (VPXORQMasked128 x y (VPMOVVec64x2ToM mask)) + // match: (Set128Uint8x32 [a] x y) + // result: (VINSERTI128256 [a] x y) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPXORQMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VINSERTI128256) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpMaskedXorInt64x4(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpSetElemInt16x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (MaskedXorInt64x4 x y mask) - // result: (VPXORQMasked256 x y (VPMOVVec64x4ToM mask)) + // match: (SetElemInt16x8 [a] x y) + // result: (VPINSRW128 [a] x y) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPXORQMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPINSRW128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpMaskedXorInt64x8(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpSetElemInt32x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (MaskedXorInt64x8 x y mask) - // result: (VPXORQMasked512 x y (VPMOVVec64x8ToM mask)) + // match: (SetElemInt32x4 [a] x y) + // result: (VPINSRD128 [a] x y) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPXORQMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPINSRD128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpMaskedXorUint32x16(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpSetElemInt64x2(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (MaskedXorUint32x16 x y mask) - // result: (VPXORDMasked512 x y (VPMOVVec32x16ToM mask)) + // match: (SetElemInt64x2 [a] x y) + // result: (VPINSRQ128 [a] x y) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPXORDMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPINSRQ128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpMaskedXorUint32x4(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpSetElemInt8x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (MaskedXorUint32x4 x y mask) - // result: (VPXORDMasked128 x y (VPMOVVec32x4ToM mask)) + // match: (SetElemInt8x16 [a] x y) + // result: (VPINSRB128 [a] x y) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPXORDMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPINSRB128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpMaskedXorUint32x8(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpSetElemUint16x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (MaskedXorUint32x8 x y mask) - // result: (VPXORDMasked256 x y (VPMOVVec32x8ToM mask)) + // match: (SetElemUint16x8 [a] x y) + // result: (VPINSRW128 [a] x y) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPXORDMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPINSRW128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpMaskedXorUint64x2(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpSetElemUint32x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (MaskedXorUint64x2 x y mask) - // result: (VPXORQMasked128 x y (VPMOVVec64x2ToM mask)) + // match: (SetElemUint32x4 [a] x y) + // result: (VPINSRD128 [a] x y) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPXORQMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPINSRD128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpMaskedXorUint64x4(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpSetElemUint64x2(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (MaskedXorUint64x4 x y mask) - // result: (VPXORQMasked256 x y (VPMOVVec64x4ToM mask)) + // match: (SetElemUint64x2 [a] x y) + // result: (VPINSRQ128 [a] x y) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPXORQMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPINSRQ128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpMaskedXorUint64x8(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpSetElemUint8x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (MaskedXorUint64x8 x y mask) - // result: (VPXORQMasked512 x y (VPMOVVec64x8ToM mask)) + // match: (SetElemUint8x16 [a] x y) + // result: (VPINSRB128 [a] x y) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPXORQMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPINSRB128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpMax32F(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromInt16x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (Max32F x y) - // result: (Neg32F (Min32F (Neg32F x) (Neg32F y))) + // match: (ShiftAllLeftAndFillUpperFromInt16x16 [a] x y) + // result: (VPSHLDW256 [a] x y) for { - t := v.Type + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpNeg32F) - v.Type = t - v0 := b.NewValue0(v.Pos, OpMin32F, t) - v1 := b.NewValue0(v.Pos, OpNeg32F, t) - v1.AddArg(x) - v2 := b.NewValue0(v.Pos, OpNeg32F, t) - v2.AddArg(y) - v0.AddArg2(v1, v2) - v.AddArg(v0) + v.reset(OpAMD64VPSHLDW256) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpMax64F(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromInt16x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (Max64F x y) - // result: (Neg64F (Min64F (Neg64F x) (Neg64F y))) + // match: (ShiftAllLeftAndFillUpperFromInt16x32 [a] x y) + // result: (VPSHLDW512 [a] x y) for { - t := v.Type + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpNeg64F) - v.Type = t - v0 := b.NewValue0(v.Pos, OpMin64F, t) - v1 := b.NewValue0(v.Pos, OpNeg64F, t) - v1.AddArg(x) - v2 := b.NewValue0(v.Pos, OpNeg64F, t) - v2.AddArg(y) - v0.AddArg2(v1, v2) - v.AddArg(v0) + v.reset(OpAMD64VPSHLDW512) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpMin32F(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromInt16x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (Min32F x y) - // result: (POR (MINSS (MINSS x y) x) (MINSS x y)) + // match: (ShiftAllLeftAndFillUpperFromInt16x8 [a] x y) + // result: (VPSHLDW128 [a] x y) for { - t := v.Type + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64POR) - v0 := b.NewValue0(v.Pos, OpAMD64MINSS, t) - v1 := b.NewValue0(v.Pos, OpAMD64MINSS, t) - v1.AddArg2(x, y) - v0.AddArg2(v1, x) - v.AddArg2(v0, v1) + v.reset(OpAMD64VPSHLDW128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpMin64F(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromInt32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (Min64F x y) - // result: (POR (MINSD (MINSD x y) x) (MINSD x y)) + // match: (ShiftAllLeftAndFillUpperFromInt32x16 [a] x y) + // result: (VPSHLDD512 [a] x y) for { - t := v.Type + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64POR) - v0 := b.NewValue0(v.Pos, OpAMD64MINSD, t) - v1 := b.NewValue0(v.Pos, OpAMD64MINSD, t) - v1.AddArg2(x, y) - v0.AddArg2(v1, x) - v.AddArg2(v0, v1) + v.reset(OpAMD64VPSHLDD512) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpMod16(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromInt32x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (Mod16 [a] x y) - // result: (Select1 (DIVW [a] x y)) + // match: (ShiftAllLeftAndFillUpperFromInt32x4 [a] x y) + // result: (VPSHLDD128 [a] x y) for { - a := auxIntToBool(v.AuxInt) + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpSelect1) - v0 := b.NewValue0(v.Pos, OpAMD64DIVW, types.NewTuple(typ.Int16, typ.Int16)) - v0.AuxInt = boolToAuxInt(a) - v0.AddArg2(x, y) - v.AddArg(v0) + v.reset(OpAMD64VPSHLDD128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpMod16u(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromInt32x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (Mod16u x y) - // result: (Select1 (DIVWU x y)) + // match: (ShiftAllLeftAndFillUpperFromInt32x8 [a] x y) + // result: (VPSHLDD256 [a] x y) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpSelect1) - v0 := b.NewValue0(v.Pos, OpAMD64DIVWU, types.NewTuple(typ.UInt16, typ.UInt16)) - v0.AddArg2(x, y) - v.AddArg(v0) + v.reset(OpAMD64VPSHLDD256) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpMod32(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromInt64x2(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (Mod32 [a] x y) - // result: (Select1 (DIVL [a] x y)) + // match: (ShiftAllLeftAndFillUpperFromInt64x2 [a] x y) + // result: (VPSHLDQ128 [a] x y) for { - a := auxIntToBool(v.AuxInt) + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpSelect1) - v0 := b.NewValue0(v.Pos, OpAMD64DIVL, types.NewTuple(typ.Int32, typ.Int32)) - v0.AuxInt = boolToAuxInt(a) - v0.AddArg2(x, y) - v.AddArg(v0) + v.reset(OpAMD64VPSHLDQ128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpMod32u(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromInt64x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (Mod32u x y) - // result: (Select1 (DIVLU x y)) + // match: (ShiftAllLeftAndFillUpperFromInt64x4 [a] x y) + // result: (VPSHLDQ256 [a] x y) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpSelect1) - v0 := b.NewValue0(v.Pos, OpAMD64DIVLU, types.NewTuple(typ.UInt32, typ.UInt32)) - v0.AddArg2(x, y) - v.AddArg(v0) + v.reset(OpAMD64VPSHLDQ256) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpMod64(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromInt64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (Mod64 [a] x y) - // result: (Select1 (DIVQ [a] x y)) + // match: (ShiftAllLeftAndFillUpperFromInt64x8 [a] x y) + // result: (VPSHLDQ512 [a] x y) for { - a := auxIntToBool(v.AuxInt) + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpSelect1) - v0 := b.NewValue0(v.Pos, OpAMD64DIVQ, types.NewTuple(typ.Int64, typ.Int64)) - v0.AuxInt = boolToAuxInt(a) - v0.AddArg2(x, y) - v.AddArg(v0) + v.reset(OpAMD64VPSHLDQ512) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpMod64u(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedInt16x16(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (Mod64u x y) - // result: (Select1 (DIVQU x y)) + // match: (ShiftAllLeftAndFillUpperFromMaskedInt16x16 [a] x y mask) + // result: (VPSHLDWMasked256 [a] x y (VPMOVVec16x16ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpSelect1) - v0 := b.NewValue0(v.Pos, OpAMD64DIVQU, types.NewTuple(typ.UInt64, typ.UInt64)) - v0.AddArg2(x, y) - v.AddArg(v0) + mask := v_2 + v.reset(OpAMD64VPSHLDWMasked256) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMod8(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedInt16x32(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (Mod8 x y) - // result: (Select1 (DIVW (SignExt8to16 x) (SignExt8to16 y))) + // match: (ShiftAllLeftAndFillUpperFromMaskedInt16x32 [a] x y mask) + // result: (VPSHLDWMasked512 [a] x y (VPMOVVec16x32ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpSelect1) - v0 := b.NewValue0(v.Pos, OpAMD64DIVW, types.NewTuple(typ.Int16, typ.Int16)) - v1 := b.NewValue0(v.Pos, OpSignExt8to16, typ.Int16) - v1.AddArg(x) - v2 := b.NewValue0(v.Pos, OpSignExt8to16, typ.Int16) - v2.AddArg(y) - v0.AddArg2(v1, v2) - v.AddArg(v0) + mask := v_2 + v.reset(OpAMD64VPSHLDWMasked512) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMod8u(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedInt16x8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (Mod8u x y) - // result: (Select1 (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y))) + // match: (ShiftAllLeftAndFillUpperFromMaskedInt16x8 [a] x y mask) + // result: (VPSHLDWMasked128 [a] x y (VPMOVVec16x8ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpSelect1) - v0 := b.NewValue0(v.Pos, OpAMD64DIVWU, types.NewTuple(typ.UInt16, typ.UInt16)) - v1 := b.NewValue0(v.Pos, OpZeroExt8to16, typ.UInt16) - v1.AddArg(x) - v2 := b.NewValue0(v.Pos, OpZeroExt8to16, typ.UInt16) - v2.AddArg(y) - v0.AddArg2(v1, v2) - v.AddArg(v0) + mask := v_2 + v.reset(OpAMD64VPSHLDWMasked128) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpMove(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedInt32x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (Move [0] _ _ mem) - // result: mem - for { - if auxIntToInt64(v.AuxInt) != 0 { - break - } - mem := v_2 - v.copyOf(mem) - return true - } - // match: (Move [1] dst src mem) - // result: (MOVBstore dst (MOVBload src mem) mem) - for { - if auxIntToInt64(v.AuxInt) != 1 { - break - } - dst := v_0 - src := v_1 - mem := v_2 - v.reset(OpAMD64MOVBstore) - v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, typ.UInt8) - v0.AddArg2(src, mem) - v.AddArg3(dst, v0, mem) - return true - } - // match: (Move [2] dst src mem) - // result: (MOVWstore dst (MOVWload src mem) mem) - for { - if auxIntToInt64(v.AuxInt) != 2 { - break - } - dst := v_0 - src := v_1 - mem := v_2 - v.reset(OpAMD64MOVWstore) - v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, typ.UInt16) - v0.AddArg2(src, mem) - v.AddArg3(dst, v0, mem) - return true - } - // match: (Move [4] dst src mem) - // result: (MOVLstore dst (MOVLload src mem) mem) - for { - if auxIntToInt64(v.AuxInt) != 4 { - break - } - dst := v_0 - src := v_1 - mem := v_2 - v.reset(OpAMD64MOVLstore) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32) - v0.AddArg2(src, mem) - v.AddArg3(dst, v0, mem) - return true - } - // match: (Move [8] dst src mem) - // result: (MOVQstore dst (MOVQload src mem) mem) - for { - if auxIntToInt64(v.AuxInt) != 8 { - break - } - dst := v_0 - src := v_1 - mem := v_2 - v.reset(OpAMD64MOVQstore) - v0 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64) - v0.AddArg2(src, mem) - v.AddArg3(dst, v0, mem) - return true - } - // match: (Move [16] dst src mem) - // result: (MOVOstore dst (MOVOload src mem) mem) - for { - if auxIntToInt64(v.AuxInt) != 16 { - break - } - dst := v_0 - src := v_1 - mem := v_2 - v.reset(OpAMD64MOVOstore) - v0 := b.NewValue0(v.Pos, OpAMD64MOVOload, types.TypeInt128) - v0.AddArg2(src, mem) - v.AddArg3(dst, v0, mem) - return true - } - // match: (Move [32] dst src mem) - // result: (Move [16] (OffPtr dst [16]) (OffPtr src [16]) (Move [16] dst src mem)) - for { - if auxIntToInt64(v.AuxInt) != 32 { - break - } - dst := v_0 - src := v_1 - mem := v_2 - v.reset(OpMove) - v.AuxInt = int64ToAuxInt(16) - v0 := b.NewValue0(v.Pos, OpOffPtr, dst.Type) - v0.AuxInt = int64ToAuxInt(16) - v0.AddArg(dst) - v1 := b.NewValue0(v.Pos, OpOffPtr, src.Type) - v1.AuxInt = int64ToAuxInt(16) - v1.AddArg(src) - v2 := b.NewValue0(v.Pos, OpMove, types.TypeMem) - v2.AuxInt = int64ToAuxInt(16) - v2.AddArg3(dst, src, mem) - v.AddArg3(v0, v1, v2) - return true - } - // match: (Move [48] dst src mem) - // result: (Move [32] (OffPtr dst [16]) (OffPtr src [16]) (Move [16] dst src mem)) - for { - if auxIntToInt64(v.AuxInt) != 48 { - break - } - dst := v_0 - src := v_1 - mem := v_2 - v.reset(OpMove) - v.AuxInt = int64ToAuxInt(32) - v0 := b.NewValue0(v.Pos, OpOffPtr, dst.Type) - v0.AuxInt = int64ToAuxInt(16) - v0.AddArg(dst) - v1 := b.NewValue0(v.Pos, OpOffPtr, src.Type) - v1.AuxInt = int64ToAuxInt(16) - v1.AddArg(src) - v2 := b.NewValue0(v.Pos, OpMove, types.TypeMem) - v2.AuxInt = int64ToAuxInt(16) - v2.AddArg3(dst, src, mem) - v.AddArg3(v0, v1, v2) - return true - } - // match: (Move [64] dst src mem) - // result: (Move [32] (OffPtr dst [32]) (OffPtr src [32]) (Move [32] dst src mem)) + // match: (ShiftAllLeftAndFillUpperFromMaskedInt32x16 [a] x y mask) + // result: (VPSHLDDMasked512 [a] x y (VPMOVVec32x16ToM mask)) for { - if auxIntToInt64(v.AuxInt) != 64 { - break - } - dst := v_0 - src := v_1 - mem := v_2 - v.reset(OpMove) - v.AuxInt = int64ToAuxInt(32) - v0 := b.NewValue0(v.Pos, OpOffPtr, dst.Type) - v0.AuxInt = int64ToAuxInt(32) - v0.AddArg(dst) - v1 := b.NewValue0(v.Pos, OpOffPtr, src.Type) - v1.AuxInt = int64ToAuxInt(32) - v1.AddArg(src) - v2 := b.NewValue0(v.Pos, OpMove, types.TypeMem) - v2.AuxInt = int64ToAuxInt(32) - v2.AddArg3(dst, src, mem) - v.AddArg3(v0, v1, v2) + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSHLDDMasked512) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (Move [3] dst src mem) - // result: (MOVBstore [2] dst (MOVBload [2] src mem) (MOVWstore dst (MOVWload src mem) mem)) - for { - if auxIntToInt64(v.AuxInt) != 3 { - break - } - dst := v_0 - src := v_1 - mem := v_2 - v.reset(OpAMD64MOVBstore) - v.AuxInt = int32ToAuxInt(2) - v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, typ.UInt8) - v0.AuxInt = int32ToAuxInt(2) - v0.AddArg2(src, mem) - v1 := b.NewValue0(v.Pos, OpAMD64MOVWstore, types.TypeMem) - v2 := b.NewValue0(v.Pos, OpAMD64MOVWload, typ.UInt16) - v2.AddArg2(src, mem) - v1.AddArg3(dst, v2, mem) - v.AddArg3(dst, v0, v1) +} +func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedInt32x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftAllLeftAndFillUpperFromMaskedInt32x4 [a] x y mask) + // result: (VPSHLDDMasked128 [a] x y (VPMOVVec32x4ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSHLDDMasked128) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (Move [5] dst src mem) - // result: (MOVBstore [4] dst (MOVBload [4] src mem) (MOVLstore dst (MOVLload src mem) mem)) +} +func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedInt32x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftAllLeftAndFillUpperFromMaskedInt32x8 [a] x y mask) + // result: (VPSHLDDMasked256 [a] x y (VPMOVVec32x8ToM mask)) for { - if auxIntToInt64(v.AuxInt) != 5 { - break - } - dst := v_0 - src := v_1 - mem := v_2 - v.reset(OpAMD64MOVBstore) - v.AuxInt = int32ToAuxInt(4) - v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, typ.UInt8) - v0.AuxInt = int32ToAuxInt(4) - v0.AddArg2(src, mem) - v1 := b.NewValue0(v.Pos, OpAMD64MOVLstore, types.TypeMem) - v2 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32) - v2.AddArg2(src, mem) - v1.AddArg3(dst, v2, mem) - v.AddArg3(dst, v0, v1) + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSHLDDMasked256) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (Move [6] dst src mem) - // result: (MOVWstore [4] dst (MOVWload [4] src mem) (MOVLstore dst (MOVLload src mem) mem)) +} +func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedInt64x2(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftAllLeftAndFillUpperFromMaskedInt64x2 [a] x y mask) + // result: (VPSHLDQMasked128 [a] x y (VPMOVVec64x2ToM mask)) for { - if auxIntToInt64(v.AuxInt) != 6 { - break - } - dst := v_0 - src := v_1 - mem := v_2 - v.reset(OpAMD64MOVWstore) - v.AuxInt = int32ToAuxInt(4) - v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, typ.UInt16) - v0.AuxInt = int32ToAuxInt(4) - v0.AddArg2(src, mem) - v1 := b.NewValue0(v.Pos, OpAMD64MOVLstore, types.TypeMem) - v2 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32) - v2.AddArg2(src, mem) - v1.AddArg3(dst, v2, mem) - v.AddArg3(dst, v0, v1) + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSHLDQMasked128) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (Move [7] dst src mem) - // result: (MOVLstore [3] dst (MOVLload [3] src mem) (MOVLstore dst (MOVLload src mem) mem)) +} +func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedInt64x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftAllLeftAndFillUpperFromMaskedInt64x4 [a] x y mask) + // result: (VPSHLDQMasked256 [a] x y (VPMOVVec64x4ToM mask)) for { - if auxIntToInt64(v.AuxInt) != 7 { - break - } - dst := v_0 - src := v_1 - mem := v_2 - v.reset(OpAMD64MOVLstore) - v.AuxInt = int32ToAuxInt(3) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32) - v0.AuxInt = int32ToAuxInt(3) - v0.AddArg2(src, mem) - v1 := b.NewValue0(v.Pos, OpAMD64MOVLstore, types.TypeMem) - v2 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32) - v2.AddArg2(src, mem) - v1.AddArg3(dst, v2, mem) - v.AddArg3(dst, v0, v1) + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSHLDQMasked256) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (Move [9] dst src mem) - // result: (MOVBstore [8] dst (MOVBload [8] src mem) (MOVQstore dst (MOVQload src mem) mem)) +} +func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedInt64x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftAllLeftAndFillUpperFromMaskedInt64x8 [a] x y mask) + // result: (VPSHLDQMasked512 [a] x y (VPMOVVec64x8ToM mask)) for { - if auxIntToInt64(v.AuxInt) != 9 { - break - } - dst := v_0 - src := v_1 - mem := v_2 - v.reset(OpAMD64MOVBstore) - v.AuxInt = int32ToAuxInt(8) - v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, typ.UInt8) - v0.AuxInt = int32ToAuxInt(8) - v0.AddArg2(src, mem) - v1 := b.NewValue0(v.Pos, OpAMD64MOVQstore, types.TypeMem) - v2 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64) - v2.AddArg2(src, mem) - v1.AddArg3(dst, v2, mem) - v.AddArg3(dst, v0, v1) + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSHLDQMasked512) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (Move [10] dst src mem) - // result: (MOVWstore [8] dst (MOVWload [8] src mem) (MOVQstore dst (MOVQload src mem) mem)) +} +func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedUint16x16(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftAllLeftAndFillUpperFromMaskedUint16x16 [a] x y mask) + // result: (VPSHLDWMasked256 [a] x y (VPMOVVec16x16ToM mask)) for { - if auxIntToInt64(v.AuxInt) != 10 { - break - } - dst := v_0 - src := v_1 - mem := v_2 - v.reset(OpAMD64MOVWstore) - v.AuxInt = int32ToAuxInt(8) - v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, typ.UInt16) - v0.AuxInt = int32ToAuxInt(8) - v0.AddArg2(src, mem) - v1 := b.NewValue0(v.Pos, OpAMD64MOVQstore, types.TypeMem) - v2 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64) - v2.AddArg2(src, mem) - v1.AddArg3(dst, v2, mem) - v.AddArg3(dst, v0, v1) + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSHLDWMasked256) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (Move [11] dst src mem) - // result: (MOVLstore [7] dst (MOVLload [7] src mem) (MOVQstore dst (MOVQload src mem) mem)) +} +func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedUint16x32(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftAllLeftAndFillUpperFromMaskedUint16x32 [a] x y mask) + // result: (VPSHLDWMasked512 [a] x y (VPMOVVec16x32ToM mask)) for { - if auxIntToInt64(v.AuxInt) != 11 { - break - } - dst := v_0 - src := v_1 - mem := v_2 - v.reset(OpAMD64MOVLstore) - v.AuxInt = int32ToAuxInt(7) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32) - v0.AuxInt = int32ToAuxInt(7) - v0.AddArg2(src, mem) - v1 := b.NewValue0(v.Pos, OpAMD64MOVQstore, types.TypeMem) - v2 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64) - v2.AddArg2(src, mem) - v1.AddArg3(dst, v2, mem) - v.AddArg3(dst, v0, v1) + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSHLDWMasked512) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (Move [12] dst src mem) - // result: (MOVLstore [8] dst (MOVLload [8] src mem) (MOVQstore dst (MOVQload src mem) mem)) +} +func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedUint16x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftAllLeftAndFillUpperFromMaskedUint16x8 [a] x y mask) + // result: (VPSHLDWMasked128 [a] x y (VPMOVVec16x8ToM mask)) for { - if auxIntToInt64(v.AuxInt) != 12 { - break - } - dst := v_0 - src := v_1 - mem := v_2 - v.reset(OpAMD64MOVLstore) - v.AuxInt = int32ToAuxInt(8) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32) - v0.AuxInt = int32ToAuxInt(8) - v0.AddArg2(src, mem) - v1 := b.NewValue0(v.Pos, OpAMD64MOVQstore, types.TypeMem) - v2 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64) - v2.AddArg2(src, mem) - v1.AddArg3(dst, v2, mem) - v.AddArg3(dst, v0, v1) + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSHLDWMasked128) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (Move [s] dst src mem) - // cond: s >= 13 && s <= 15 - // result: (MOVQstore [int32(s-8)] dst (MOVQload [int32(s-8)] src mem) (MOVQstore dst (MOVQload src mem) mem)) +} +func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedUint32x16(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftAllLeftAndFillUpperFromMaskedUint32x16 [a] x y mask) + // result: (VPSHLDDMasked512 [a] x y (VPMOVVec32x16ToM mask)) for { - s := auxIntToInt64(v.AuxInt) - dst := v_0 - src := v_1 - mem := v_2 - if !(s >= 13 && s <= 15) { - break - } - v.reset(OpAMD64MOVQstore) - v.AuxInt = int32ToAuxInt(int32(s - 8)) - v0 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64) - v0.AuxInt = int32ToAuxInt(int32(s - 8)) - v0.AddArg2(src, mem) - v1 := b.NewValue0(v.Pos, OpAMD64MOVQstore, types.TypeMem) - v2 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64) - v2.AddArg2(src, mem) - v1.AddArg3(dst, v2, mem) - v.AddArg3(dst, v0, v1) + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSHLDDMasked512) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (Move [s] dst src mem) - // cond: s > 16 && s%16 != 0 && s%16 <= 8 - // result: (Move [s-s%16] (OffPtr dst [s%16]) (OffPtr src [s%16]) (MOVQstore dst (MOVQload src mem) mem)) +} +func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedUint32x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftAllLeftAndFillUpperFromMaskedUint32x4 [a] x y mask) + // result: (VPSHLDDMasked128 [a] x y (VPMOVVec32x4ToM mask)) for { - s := auxIntToInt64(v.AuxInt) - dst := v_0 - src := v_1 - mem := v_2 - if !(s > 16 && s%16 != 0 && s%16 <= 8) { - break - } - v.reset(OpMove) - v.AuxInt = int64ToAuxInt(s - s%16) - v0 := b.NewValue0(v.Pos, OpOffPtr, dst.Type) - v0.AuxInt = int64ToAuxInt(s % 16) - v0.AddArg(dst) - v1 := b.NewValue0(v.Pos, OpOffPtr, src.Type) - v1.AuxInt = int64ToAuxInt(s % 16) - v1.AddArg(src) - v2 := b.NewValue0(v.Pos, OpAMD64MOVQstore, types.TypeMem) - v3 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64) - v3.AddArg2(src, mem) - v2.AddArg3(dst, v3, mem) - v.AddArg3(v0, v1, v2) + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSHLDDMasked128) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (Move [s] dst src mem) - // cond: s > 16 && s%16 != 0 && s%16 > 8 - // result: (Move [s-s%16] (OffPtr dst [s%16]) (OffPtr src [s%16]) (MOVOstore dst (MOVOload src mem) mem)) +} +func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedUint32x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftAllLeftAndFillUpperFromMaskedUint32x8 [a] x y mask) + // result: (VPSHLDDMasked256 [a] x y (VPMOVVec32x8ToM mask)) for { - s := auxIntToInt64(v.AuxInt) - dst := v_0 - src := v_1 - mem := v_2 - if !(s > 16 && s%16 != 0 && s%16 > 8) { - break - } - v.reset(OpMove) - v.AuxInt = int64ToAuxInt(s - s%16) - v0 := b.NewValue0(v.Pos, OpOffPtr, dst.Type) - v0.AuxInt = int64ToAuxInt(s % 16) - v0.AddArg(dst) - v1 := b.NewValue0(v.Pos, OpOffPtr, src.Type) - v1.AuxInt = int64ToAuxInt(s % 16) - v1.AddArg(src) - v2 := b.NewValue0(v.Pos, OpAMD64MOVOstore, types.TypeMem) - v3 := b.NewValue0(v.Pos, OpAMD64MOVOload, types.TypeInt128) - v3.AddArg2(src, mem) - v2.AddArg3(dst, v3, mem) - v.AddArg3(v0, v1, v2) + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSHLDDMasked256) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (Move [s] dst src mem) - // cond: s > 64 && s <= 16*64 && s%16 == 0 && logLargeCopy(v, s) - // result: (DUFFCOPY [s] dst src mem) +} +func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedUint64x2(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftAllLeftAndFillUpperFromMaskedUint64x2 [a] x y mask) + // result: (VPSHLDQMasked128 [a] x y (VPMOVVec64x2ToM mask)) for { - s := auxIntToInt64(v.AuxInt) - dst := v_0 - src := v_1 - mem := v_2 - if !(s > 64 && s <= 16*64 && s%16 == 0 && logLargeCopy(v, s)) { - break - } - v.reset(OpAMD64DUFFCOPY) - v.AuxInt = int64ToAuxInt(s) - v.AddArg3(dst, src, mem) + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSHLDQMasked128) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (Move [s] dst src mem) - // cond: s > 16*64 && s%8 == 0 && logLargeCopy(v, s) - // result: (REPMOVSQ dst src (MOVQconst [s/8]) mem) +} +func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedUint64x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftAllLeftAndFillUpperFromMaskedUint64x4 [a] x y mask) + // result: (VPSHLDQMasked256 [a] x y (VPMOVVec64x4ToM mask)) for { - s := auxIntToInt64(v.AuxInt) - dst := v_0 - src := v_1 - mem := v_2 - if !(s > 16*64 && s%8 == 0 && logLargeCopy(v, s)) { - break - } - v.reset(OpAMD64REPMOVSQ) - v0 := b.NewValue0(v.Pos, OpAMD64MOVQconst, typ.UInt64) - v0.AuxInt = int64ToAuxInt(s / 8) - v.AddArg4(dst, src, v0, mem) + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSHLDQMasked256) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - return false } -func rewriteValueAMD64_OpNeg32F(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedUint64x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (Neg32F x) - // result: (PXOR x (MOVSSconst [float32(math.Copysign(0, -1))])) + // match: (ShiftAllLeftAndFillUpperFromMaskedUint64x8 [a] x y mask) + // result: (VPSHLDQMasked512 [a] x y (VPMOVVec64x8ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - v.reset(OpAMD64PXOR) - v0 := b.NewValue0(v.Pos, OpAMD64MOVSSconst, typ.Float32) - v0.AuxInt = float32ToAuxInt(float32(math.Copysign(0, -1))) - v.AddArg2(x, v0) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSHLDQMasked512) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpNeg64F(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromUint16x16(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (Neg64F x) - // result: (PXOR x (MOVSDconst [math.Copysign(0, -1)])) + // match: (ShiftAllLeftAndFillUpperFromUint16x16 [a] x y) + // result: (VPSHLDW256 [a] x y) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - v.reset(OpAMD64PXOR) - v0 := b.NewValue0(v.Pos, OpAMD64MOVSDconst, typ.Float64) - v0.AuxInt = float64ToAuxInt(math.Copysign(0, -1)) - v.AddArg2(x, v0) + y := v_1 + v.reset(OpAMD64VPSHLDW256) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpNeq16(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromUint16x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (Neq16 x y) - // result: (SETNE (CMPW x y)) + // match: (ShiftAllLeftAndFillUpperFromUint16x32 [a] x y) + // result: (VPSHLDW512 [a] x y) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64SETNE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPW, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) + v.reset(OpAMD64VPSHLDW512) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpNeq32(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromUint16x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (Neq32 x y) - // result: (SETNE (CMPL x y)) + // match: (ShiftAllLeftAndFillUpperFromUint16x8 [a] x y) + // result: (VPSHLDW128 [a] x y) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64SETNE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPL, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) + v.reset(OpAMD64VPSHLDW128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpNeq32F(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromUint32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (Neq32F x y) - // result: (SETNEF (UCOMISS x y)) + // match: (ShiftAllLeftAndFillUpperFromUint32x16 [a] x y) + // result: (VPSHLDD512 [a] x y) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64SETNEF) - v0 := b.NewValue0(v.Pos, OpAMD64UCOMISS, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) + v.reset(OpAMD64VPSHLDD512) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpNeq64(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromUint32x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (Neq64 x y) - // result: (SETNE (CMPQ x y)) + // match: (ShiftAllLeftAndFillUpperFromUint32x4 [a] x y) + // result: (VPSHLDD128 [a] x y) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64SETNE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) + v.reset(OpAMD64VPSHLDD128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpNeq64F(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromUint32x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (Neq64F x y) - // result: (SETNEF (UCOMISD x y)) + // match: (ShiftAllLeftAndFillUpperFromUint32x8 [a] x y) + // result: (VPSHLDD256 [a] x y) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64SETNEF) - v0 := b.NewValue0(v.Pos, OpAMD64UCOMISD, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) + v.reset(OpAMD64VPSHLDD256) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpNeq8(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromUint64x2(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (Neq8 x y) - // result: (SETNE (CMPB x y)) + // match: (ShiftAllLeftAndFillUpperFromUint64x2 [a] x y) + // result: (VPSHLDQ128 [a] x y) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64SETNE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) + v.reset(OpAMD64VPSHLDQ128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpNeqB(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromUint64x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (NeqB x y) - // result: (SETNE (CMPB x y)) + // match: (ShiftAllLeftAndFillUpperFromUint64x4 [a] x y) + // result: (VPSHLDQ256 [a] x y) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64SETNE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) + v.reset(OpAMD64VPSHLDQ256) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpNeqPtr(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromUint64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (NeqPtr x y) - // result: (SETNE (CMPQ x y)) + // match: (ShiftAllLeftAndFillUpperFromUint64x8 [a] x y) + // result: (VPSHLDQ512 [a] x y) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64SETNE) - v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags) - v0.AddArg2(x, y) - v.AddArg(v0) + v.reset(OpAMD64VPSHLDQ512) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpNot(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftMaskedInt64x2(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (Not x) - // result: (XORLconst [1] x) + b := v.Block + // match: (ShiftAllLeftMaskedInt64x2 x y mask) + // result: (VPSLLQMasked128 x y (VPMOVVec64x2ToM mask)) for { x := v_0 - v.reset(OpAMD64XORLconst) - v.AuxInt = int32ToAuxInt(1) - v.AddArg(x) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSLLQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpNotEqualFloat32x16(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftMaskedInt64x4(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (NotEqualFloat32x16 x y) - // result: (VPMOVMToVec32x16 (VCMPPS512 [4] x y)) + // match: (ShiftAllLeftMaskedInt64x4 x y mask) + // result: (VPSLLQMasked256 x y (VPMOVVec64x4ToM mask)) for { x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask) - v0.AuxInt = int8ToAuxInt(4) - v0.AddArg2(x, y) - v.AddArg(v0) + mask := v_2 + v.reset(OpAMD64VPSLLQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpNotEqualFloat32x4(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftMaskedInt64x8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (NotEqualFloat32x4 x y) - // result: (VCMPPS128 [4] x y) + b := v.Block + // match: (ShiftAllLeftMaskedInt64x8 x y mask) + // result: (VPSLLQMasked512 x y (VPMOVVec64x8ToM mask)) for { x := v_0 y := v_1 - v.reset(OpAMD64VCMPPS128) - v.AuxInt = int8ToAuxInt(4) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSLLQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpNotEqualFloat32x8(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftMaskedUint64x2(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (NotEqualFloat32x8 x y) - // result: (VCMPPS256 [4] x y) + b := v.Block + // match: (ShiftAllLeftMaskedUint64x2 x y mask) + // result: (VPSLLQMasked128 x y (VPMOVVec64x2ToM mask)) for { x := v_0 y := v_1 - v.reset(OpAMD64VCMPPS256) - v.AuxInt = int8ToAuxInt(4) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSLLQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpNotEqualFloat64x2(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftMaskedUint64x4(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (NotEqualFloat64x2 x y) - // result: (VCMPPD128 [4] x y) + b := v.Block + // match: (ShiftAllLeftMaskedUint64x4 x y mask) + // result: (VPSLLQMasked256 x y (VPMOVVec64x4ToM mask)) for { x := v_0 y := v_1 - v.reset(OpAMD64VCMPPD128) - v.AuxInt = int8ToAuxInt(4) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSLLQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpNotEqualFloat64x4(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftMaskedUint64x8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (NotEqualFloat64x4 x y) - // result: (VCMPPD256 [4] x y) + b := v.Block + // match: (ShiftAllLeftMaskedUint64x8 x y mask) + // result: (VPSLLQMasked512 x y (VPMOVVec64x8ToM mask)) for { x := v_0 y := v_1 - v.reset(OpAMD64VCMPPD256) - v.AuxInt = int8ToAuxInt(4) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSLLQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpNotEqualFloat64x8(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromInt16x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (NotEqualFloat64x8 x y) - // result: (VPMOVMToVec64x8 (VCMPPD512 [4] x y)) + // match: (ShiftAllRightAndFillUpperFromInt16x16 [a] x y) + // result: (VPSHRDW256 [a] x y) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask) - v0.AuxInt = int8ToAuxInt(4) - v0.AddArg2(x, y) - v.AddArg(v0) + v.reset(OpAMD64VPSHRDW256) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpNotEqualInt16x16(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromInt16x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (NotEqualInt16x16 x y) - // result: (VPMOVMToVec16x16 (VPCMPW256 [4] x y)) + // match: (ShiftAllRightAndFillUpperFromInt16x32 [a] x y) + // result: (VPSHRDW512 [a] x y) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec16x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW256, typ.Mask) - v0.AuxInt = int8ToAuxInt(4) - v0.AddArg2(x, y) - v.AddArg(v0) + v.reset(OpAMD64VPSHRDW512) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpNotEqualInt16x32(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromInt16x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (NotEqualInt16x32 x y) - // result: (VPMOVMToVec16x32 (VPCMPW512 [4] x y)) + // match: (ShiftAllRightAndFillUpperFromInt16x8 [a] x y) + // result: (VPSHRDW128 [a] x y) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec16x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW512, typ.Mask) - v0.AuxInt = int8ToAuxInt(4) - v0.AddArg2(x, y) - v.AddArg(v0) + v.reset(OpAMD64VPSHRDW128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpNotEqualInt16x8(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromInt32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (NotEqualInt16x8 x y) - // result: (VPMOVMToVec16x8 (VPCMPW128 [4] x y)) + // match: (ShiftAllRightAndFillUpperFromInt32x16 [a] x y) + // result: (VPSHRDD512 [a] x y) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec16x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW128, typ.Mask) - v0.AuxInt = int8ToAuxInt(4) - v0.AddArg2(x, y) - v.AddArg(v0) + v.reset(OpAMD64VPSHRDD512) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpNotEqualInt32x16(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromInt32x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (NotEqualInt32x16 x y) - // result: (VPMOVMToVec32x16 (VPCMPD512 [4] x y)) + // match: (ShiftAllRightAndFillUpperFromInt32x4 [a] x y) + // result: (VPSHRDD128 [a] x y) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD512, typ.Mask) - v0.AuxInt = int8ToAuxInt(4) - v0.AddArg2(x, y) - v.AddArg(v0) + v.reset(OpAMD64VPSHRDD128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpNotEqualInt32x4(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromInt32x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (NotEqualInt32x4 x y) - // result: (VPMOVMToVec32x4 (VPCMPD128 [4] x y)) + // match: (ShiftAllRightAndFillUpperFromInt32x8 [a] x y) + // result: (VPSHRDD256 [a] x y) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec32x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD128, typ.Mask) - v0.AuxInt = int8ToAuxInt(4) - v0.AddArg2(x, y) - v.AddArg(v0) + v.reset(OpAMD64VPSHRDD256) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpNotEqualInt32x8(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromInt64x2(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (NotEqualInt32x8 x y) - // result: (VPMOVMToVec32x8 (VPCMPD256 [4] x y)) + // match: (ShiftAllRightAndFillUpperFromInt64x2 [a] x y) + // result: (VPSHRDQ128 [a] x y) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec32x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD256, typ.Mask) - v0.AuxInt = int8ToAuxInt(4) - v0.AddArg2(x, y) - v.AddArg(v0) + v.reset(OpAMD64VPSHRDQ128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpNotEqualInt64x2(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromInt64x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (NotEqualInt64x2 x y) - // result: (VPMOVMToVec64x2 (VPCMPQ128 [4] x y)) + // match: (ShiftAllRightAndFillUpperFromInt64x4 [a] x y) + // result: (VPSHRDQ256 [a] x y) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec64x2) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ128, typ.Mask) - v0.AuxInt = int8ToAuxInt(4) - v0.AddArg2(x, y) - v.AddArg(v0) + v.reset(OpAMD64VPSHRDQ256) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpNotEqualInt64x4(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromInt64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (NotEqualInt64x4 x y) - // result: (VPMOVMToVec64x4 (VPCMPQ256 [4] x y)) + // match: (ShiftAllRightAndFillUpperFromInt64x8 [a] x y) + // result: (VPSHRDQ512 [a] x y) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec64x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ256, typ.Mask) - v0.AuxInt = int8ToAuxInt(4) - v0.AddArg2(x, y) - v.AddArg(v0) + v.reset(OpAMD64VPSHRDQ512) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpNotEqualInt64x8(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedInt16x16(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (NotEqualInt64x8 x y) - // result: (VPMOVMToVec64x8 (VPCMPQ512 [4] x y)) + // match: (ShiftAllRightAndFillUpperFromMaskedInt16x16 [a] x y mask) + // result: (VPSHRDWMasked256 [a] x y (VPMOVVec16x16ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ512, typ.Mask) - v0.AuxInt = int8ToAuxInt(4) - v0.AddArg2(x, y) - v.AddArg(v0) + mask := v_2 + v.reset(OpAMD64VPSHRDWMasked256) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpNotEqualInt8x16(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedInt16x32(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (NotEqualInt8x16 x y) - // result: (VPMOVMToVec8x16 (VPCMPB128 [4] x y)) + // match: (ShiftAllRightAndFillUpperFromMaskedInt16x32 [a] x y mask) + // result: (VPSHRDWMasked512 [a] x y (VPMOVVec16x32ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec8x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB128, typ.Mask) - v0.AuxInt = int8ToAuxInt(4) - v0.AddArg2(x, y) - v.AddArg(v0) + mask := v_2 + v.reset(OpAMD64VPSHRDWMasked512) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpNotEqualInt8x32(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedInt16x8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (NotEqualInt8x32 x y) - // result: (VPMOVMToVec8x32 (VPCMPB256 [4] x y)) + // match: (ShiftAllRightAndFillUpperFromMaskedInt16x8 [a] x y mask) + // result: (VPSHRDWMasked128 [a] x y (VPMOVVec16x8ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec8x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB256, typ.Mask) - v0.AuxInt = int8ToAuxInt(4) - v0.AddArg2(x, y) - v.AddArg(v0) + mask := v_2 + v.reset(OpAMD64VPSHRDWMasked128) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpNotEqualInt8x64(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedInt32x16(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (NotEqualInt8x64 x y) - // result: (VPMOVMToVec8x64 (VPCMPB512 [4] x y)) + // match: (ShiftAllRightAndFillUpperFromMaskedInt32x16 [a] x y mask) + // result: (VPSHRDDMasked512 [a] x y (VPMOVVec32x16ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec8x64) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB512, typ.Mask) - v0.AuxInt = int8ToAuxInt(4) - v0.AddArg2(x, y) - v.AddArg(v0) + mask := v_2 + v.reset(OpAMD64VPSHRDDMasked512) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpNotEqualUint16x16(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedInt32x4(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (NotEqualUint16x16 x y) - // result: (VPMOVMToVec16x16 (VPCMPUW256 [4] x y)) + // match: (ShiftAllRightAndFillUpperFromMaskedInt32x4 [a] x y mask) + // result: (VPSHRDDMasked128 [a] x y (VPMOVVec32x4ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec16x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW256, typ.Mask) - v0.AuxInt = int8ToAuxInt(4) - v0.AddArg2(x, y) - v.AddArg(v0) + mask := v_2 + v.reset(OpAMD64VPSHRDDMasked128) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpNotEqualUint16x32(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedInt32x8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (NotEqualUint16x32 x y) - // result: (VPMOVMToVec16x32 (VPCMPUW512 [4] x y)) + // match: (ShiftAllRightAndFillUpperFromMaskedInt32x8 [a] x y mask) + // result: (VPSHRDDMasked256 [a] x y (VPMOVVec32x8ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec16x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW512, typ.Mask) - v0.AuxInt = int8ToAuxInt(4) - v0.AddArg2(x, y) - v.AddArg(v0) + mask := v_2 + v.reset(OpAMD64VPSHRDDMasked256) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpNotEqualUint16x8(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedInt64x2(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (NotEqualUint16x8 x y) - // result: (VPMOVMToVec16x8 (VPCMPUW128 [4] x y)) + // match: (ShiftAllRightAndFillUpperFromMaskedInt64x2 [a] x y mask) + // result: (VPSHRDQMasked128 [a] x y (VPMOVVec64x2ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec16x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW128, typ.Mask) - v0.AuxInt = int8ToAuxInt(4) - v0.AddArg2(x, y) - v.AddArg(v0) + mask := v_2 + v.reset(OpAMD64VPSHRDQMasked128) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpNotEqualUint32x16(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedInt64x4(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (NotEqualUint32x16 x y) - // result: (VPMOVMToVec32x16 (VPCMPUD512 [4] x y)) + // match: (ShiftAllRightAndFillUpperFromMaskedInt64x4 [a] x y mask) + // result: (VPSHRDQMasked256 [a] x y (VPMOVVec64x4ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec32x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD512, typ.Mask) - v0.AuxInt = int8ToAuxInt(4) - v0.AddArg2(x, y) - v.AddArg(v0) + mask := v_2 + v.reset(OpAMD64VPSHRDQMasked256) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpNotEqualUint32x4(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedInt64x8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (NotEqualUint32x4 x y) - // result: (VPMOVMToVec32x4 (VPCMPUD128 [4] x y)) + // match: (ShiftAllRightAndFillUpperFromMaskedInt64x8 [a] x y mask) + // result: (VPSHRDQMasked512 [a] x y (VPMOVVec64x8ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec32x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD128, typ.Mask) - v0.AuxInt = int8ToAuxInt(4) - v0.AddArg2(x, y) - v.AddArg(v0) + mask := v_2 + v.reset(OpAMD64VPSHRDQMasked512) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpNotEqualUint32x8(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedUint16x16(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (NotEqualUint32x8 x y) - // result: (VPMOVMToVec32x8 (VPCMPUD256 [4] x y)) + // match: (ShiftAllRightAndFillUpperFromMaskedUint16x16 [a] x y mask) + // result: (VPSHRDWMasked256 [a] x y (VPMOVVec16x16ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec32x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD256, typ.Mask) - v0.AuxInt = int8ToAuxInt(4) - v0.AddArg2(x, y) - v.AddArg(v0) + mask := v_2 + v.reset(OpAMD64VPSHRDWMasked256) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpNotEqualUint64x2(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedUint16x32(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (NotEqualUint64x2 x y) - // result: (VPMOVMToVec64x2 (VPCMPUQ128 [4] x y)) + // match: (ShiftAllRightAndFillUpperFromMaskedUint16x32 [a] x y mask) + // result: (VPSHRDWMasked512 [a] x y (VPMOVVec16x32ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec64x2) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ128, typ.Mask) - v0.AuxInt = int8ToAuxInt(4) - v0.AddArg2(x, y) - v.AddArg(v0) + mask := v_2 + v.reset(OpAMD64VPSHRDWMasked512) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpNotEqualUint64x4(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedUint16x8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (NotEqualUint64x4 x y) - // result: (VPMOVMToVec64x4 (VPCMPUQ256 [4] x y)) + // match: (ShiftAllRightAndFillUpperFromMaskedUint16x8 [a] x y mask) + // result: (VPSHRDWMasked128 [a] x y (VPMOVVec16x8ToM mask)) for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec64x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ256, typ.Mask) - v0.AuxInt = int8ToAuxInt(4) - v0.AddArg2(x, y) - v.AddArg(v0) + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSHRDWMasked128) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpNotEqualUint64x8(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedUint32x16(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (NotEqualUint64x8 x y) - // result: (VPMOVMToVec64x8 (VPCMPUQ512 [4] x y)) + // match: (ShiftAllRightAndFillUpperFromMaskedUint32x16 [a] x y mask) + // result: (VPSHRDDMasked512 [a] x y (VPMOVVec32x16ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec64x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ512, typ.Mask) - v0.AuxInt = int8ToAuxInt(4) - v0.AddArg2(x, y) - v.AddArg(v0) + mask := v_2 + v.reset(OpAMD64VPSHRDDMasked512) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpNotEqualUint8x16(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedUint32x4(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (NotEqualUint8x16 x y) - // result: (VPMOVMToVec8x16 (VPCMPUB128 [4] x y)) + // match: (ShiftAllRightAndFillUpperFromMaskedUint32x4 [a] x y mask) + // result: (VPSHRDDMasked128 [a] x y (VPMOVVec32x4ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec8x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB128, typ.Mask) - v0.AuxInt = int8ToAuxInt(4) - v0.AddArg2(x, y) - v.AddArg(v0) + mask := v_2 + v.reset(OpAMD64VPSHRDDMasked128) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpNotEqualUint8x32(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedUint32x8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (NotEqualUint8x32 x y) - // result: (VPMOVMToVec8x32 (VPCMPUB256 [4] x y)) + // match: (ShiftAllRightAndFillUpperFromMaskedUint32x8 [a] x y mask) + // result: (VPSHRDDMasked256 [a] x y (VPMOVVec32x8ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec8x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB256, typ.Mask) - v0.AuxInt = int8ToAuxInt(4) - v0.AddArg2(x, y) - v.AddArg(v0) + mask := v_2 + v.reset(OpAMD64VPSHRDDMasked256) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpNotEqualUint8x64(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedUint64x2(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (NotEqualUint8x64 x y) - // result: (VPMOVMToVec8x64 (VPCMPUB512 [4] x y)) + // match: (ShiftAllRightAndFillUpperFromMaskedUint64x2 [a] x y mask) + // result: (VPSHRDQMasked128 [a] x y (VPMOVVec64x2ToM mask)) for { + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPMOVMToVec8x64) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB512, typ.Mask) - v0.AuxInt = int8ToAuxInt(4) - v0.AddArg2(x, y) - v.AddArg(v0) + mask := v_2 + v.reset(OpAMD64VPSHRDQMasked128) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpOffPtr(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedUint64x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (OffPtr [off] ptr) - // cond: is32Bit(off) - // result: (ADDQconst [int32(off)] ptr) - for { - off := auxIntToInt64(v.AuxInt) - ptr := v_0 - if !(is32Bit(off)) { - break - } - v.reset(OpAMD64ADDQconst) - v.AuxInt = int32ToAuxInt(int32(off)) - v.AddArg(ptr) - return true - } - // match: (OffPtr [off] ptr) - // result: (ADDQ (MOVQconst [off]) ptr) + // match: (ShiftAllRightAndFillUpperFromMaskedUint64x4 [a] x y mask) + // result: (VPSHRDQMasked256 [a] x y (VPMOVVec64x4ToM mask)) for { - off := auxIntToInt64(v.AuxInt) - ptr := v_0 - v.reset(OpAMD64ADDQ) - v0 := b.NewValue0(v.Pos, OpAMD64MOVQconst, typ.UInt64) - v0.AuxInt = int64ToAuxInt(off) - v.AddArg2(v0, ptr) + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSHRDQMasked256) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpPanicBounds(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedUint64x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (PanicBounds [kind] x y mem) - // cond: boundsABI(kind) == 0 - // result: (LoweredPanicBoundsA [kind] x y mem) + b := v.Block + // match: (ShiftAllRightAndFillUpperFromMaskedUint64x8 [a] x y mask) + // result: (VPSHRDQMasked512 [a] x y (VPMOVVec64x8ToM mask)) for { - kind := auxIntToInt64(v.AuxInt) + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - mem := v_2 - if !(boundsABI(kind) == 0) { - break - } - v.reset(OpAMD64LoweredPanicBoundsA) - v.AuxInt = int64ToAuxInt(kind) - v.AddArg3(x, y, mem) + mask := v_2 + v.reset(OpAMD64VPSHRDQMasked512) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (PanicBounds [kind] x y mem) - // cond: boundsABI(kind) == 1 - // result: (LoweredPanicBoundsB [kind] x y mem) +} +func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromUint16x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllRightAndFillUpperFromUint16x16 [a] x y) + // result: (VPSHRDW256 [a] x y) for { - kind := auxIntToInt64(v.AuxInt) + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - mem := v_2 - if !(boundsABI(kind) == 1) { - break - } - v.reset(OpAMD64LoweredPanicBoundsB) - v.AuxInt = int64ToAuxInt(kind) - v.AddArg3(x, y, mem) + v.reset(OpAMD64VPSHRDW256) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) return true } - // match: (PanicBounds [kind] x y mem) - // cond: boundsABI(kind) == 2 - // result: (LoweredPanicBoundsC [kind] x y mem) +} +func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromUint16x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllRightAndFillUpperFromUint16x32 [a] x y) + // result: (VPSHRDW512 [a] x y) for { - kind := auxIntToInt64(v.AuxInt) + a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - mem := v_2 - if !(boundsABI(kind) == 2) { - break - } - v.reset(OpAMD64LoweredPanicBoundsC) - v.AuxInt = int64ToAuxInt(kind) - v.AddArg3(x, y, mem) + v.reset(OpAMD64VPSHRDW512) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) return true } - return false } -func rewriteValueAMD64_OpPopCount16(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromUint16x8(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (PopCount16 x) - // result: (POPCNTL (MOVWQZX x)) + // match: (ShiftAllRightAndFillUpperFromUint16x8 [a] x y) + // result: (VPSHRDW128 [a] x y) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - v.reset(OpAMD64POPCNTL) - v0 := b.NewValue0(v.Pos, OpAMD64MOVWQZX, typ.UInt32) - v0.AddArg(x) - v.AddArg(v0) + y := v_1 + v.reset(OpAMD64VPSHRDW128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpPopCount8(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromUint32x16(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (PopCount8 x) - // result: (POPCNTL (MOVBQZX x)) + // match: (ShiftAllRightAndFillUpperFromUint32x16 [a] x y) + // result: (VPSHRDD512 [a] x y) for { + a := auxIntToInt8(v.AuxInt) x := v_0 - v.reset(OpAMD64POPCNTL) - v0 := b.NewValue0(v.Pos, OpAMD64MOVBQZX, typ.UInt32) - v0.AddArg(x) - v.AddArg(v0) + y := v_1 + v.reset(OpAMD64VPSHRDD512) + v.AuxInt = int8ToAuxInt(a) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpRotateAllLeftInt32x16(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromUint32x4(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (RotateAllLeftInt32x16 [a] x) - // result: (VPROLD512 [a] x) + // match: (ShiftAllRightAndFillUpperFromUint32x4 [a] x y) + // result: (VPSHRDD128 [a] x y) for { a := auxIntToInt8(v.AuxInt) x := v_0 - v.reset(OpAMD64VPROLD512) + y := v_1 + v.reset(OpAMD64VPSHRDD128) v.AuxInt = int8ToAuxInt(a) - v.AddArg(x) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpRotateAllLeftInt32x4(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromUint32x8(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (RotateAllLeftInt32x4 [a] x) - // result: (VPROLD128 [a] x) + // match: (ShiftAllRightAndFillUpperFromUint32x8 [a] x y) + // result: (VPSHRDD256 [a] x y) for { a := auxIntToInt8(v.AuxInt) x := v_0 - v.reset(OpAMD64VPROLD128) + y := v_1 + v.reset(OpAMD64VPSHRDD256) v.AuxInt = int8ToAuxInt(a) - v.AddArg(x) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpRotateAllLeftInt32x8(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromUint64x2(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (RotateAllLeftInt32x8 [a] x) - // result: (VPROLD256 [a] x) + // match: (ShiftAllRightAndFillUpperFromUint64x2 [a] x y) + // result: (VPSHRDQ128 [a] x y) for { a := auxIntToInt8(v.AuxInt) x := v_0 - v.reset(OpAMD64VPROLD256) + y := v_1 + v.reset(OpAMD64VPSHRDQ128) v.AuxInt = int8ToAuxInt(a) - v.AddArg(x) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpRotateAllLeftInt64x2(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromUint64x4(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (RotateAllLeftInt64x2 [a] x) - // result: (VPROLQ128 [a] x) + // match: (ShiftAllRightAndFillUpperFromUint64x4 [a] x y) + // result: (VPSHRDQ256 [a] x y) for { a := auxIntToInt8(v.AuxInt) x := v_0 - v.reset(OpAMD64VPROLQ128) + y := v_1 + v.reset(OpAMD64VPSHRDQ256) v.AuxInt = int8ToAuxInt(a) - v.AddArg(x) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpRotateAllLeftInt64x4(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromUint64x8(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (RotateAllLeftInt64x4 [a] x) - // result: (VPROLQ256 [a] x) + // match: (ShiftAllRightAndFillUpperFromUint64x8 [a] x y) + // result: (VPSHRDQ512 [a] x y) for { a := auxIntToInt8(v.AuxInt) x := v_0 - v.reset(OpAMD64VPROLQ256) + y := v_1 + v.reset(OpAMD64VPSHRDQ512) v.AuxInt = int8ToAuxInt(a) - v.AddArg(x) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpRotateAllLeftInt64x8(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightMaskedInt64x2(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (RotateAllLeftInt64x8 [a] x) - // result: (VPROLQ512 [a] x) + b := v.Block + // match: (ShiftAllRightMaskedInt64x2 x y mask) + // result: (VPSRLQMasked128 x y (VPMOVVec64x2ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - v.reset(OpAMD64VPROLQ512) - v.AuxInt = int8ToAuxInt(a) - v.AddArg(x) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRLQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpRotateAllLeftUint32x16(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightMaskedInt64x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (RotateAllLeftUint32x16 [a] x) - // result: (VPROLD512 [a] x) + b := v.Block + // match: (ShiftAllRightMaskedInt64x4 x y mask) + // result: (VPSRLQMasked256 x y (VPMOVVec64x4ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - v.reset(OpAMD64VPROLD512) - v.AuxInt = int8ToAuxInt(a) - v.AddArg(x) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRLQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpRotateAllLeftUint32x4(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightMaskedInt64x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (RotateAllLeftUint32x4 [a] x) - // result: (VPROLD128 [a] x) + b := v.Block + // match: (ShiftAllRightMaskedInt64x8 x y mask) + // result: (VPSRLQMasked512 x y (VPMOVVec64x8ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - v.reset(OpAMD64VPROLD128) - v.AuxInt = int8ToAuxInt(a) - v.AddArg(x) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRLQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpRotateAllLeftUint32x8(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightMaskedUint64x2(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (RotateAllLeftUint32x8 [a] x) - // result: (VPROLD256 [a] x) + b := v.Block + // match: (ShiftAllRightMaskedUint64x2 x y mask) + // result: (VPSRLQMasked128 x y (VPMOVVec64x2ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - v.reset(OpAMD64VPROLD256) - v.AuxInt = int8ToAuxInt(a) - v.AddArg(x) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRLQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpRotateAllLeftUint64x2(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightMaskedUint64x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (RotateAllLeftUint64x2 [a] x) - // result: (VPROLQ128 [a] x) + b := v.Block + // match: (ShiftAllRightMaskedUint64x4 x y mask) + // result: (VPSRLQMasked256 x y (VPMOVVec64x4ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - v.reset(OpAMD64VPROLQ128) - v.AuxInt = int8ToAuxInt(a) - v.AddArg(x) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRLQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpRotateAllLeftUint64x4(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightMaskedUint64x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (RotateAllLeftUint64x4 [a] x) - // result: (VPROLQ256 [a] x) + b := v.Block + // match: (ShiftAllRightMaskedUint64x8 x y mask) + // result: (VPSRLQMasked512 x y (VPMOVVec64x8ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - v.reset(OpAMD64VPROLQ256) - v.AuxInt = int8ToAuxInt(a) - v.AddArg(x) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRLQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpRotateAllLeftUint64x8(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightSignExtendedMaskedInt64x2(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (RotateAllLeftUint64x8 [a] x) - // result: (VPROLQ512 [a] x) + b := v.Block + // match: (ShiftAllRightSignExtendedMaskedInt64x2 x y mask) + // result: (VPSRAQMasked128 x y (VPMOVVec64x2ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - v.reset(OpAMD64VPROLQ512) - v.AuxInt = int8ToAuxInt(a) - v.AddArg(x) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRAQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpRotateAllRightInt32x16(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightSignExtendedMaskedInt64x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (RotateAllRightInt32x16 [a] x) - // result: (VPRORD512 [a] x) + b := v.Block + // match: (ShiftAllRightSignExtendedMaskedInt64x4 x y mask) + // result: (VPSRAQMasked256 x y (VPMOVVec64x4ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - v.reset(OpAMD64VPRORD512) - v.AuxInt = int8ToAuxInt(a) - v.AddArg(x) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRAQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpRotateAllRightInt32x4(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightSignExtendedMaskedInt64x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (RotateAllRightInt32x4 [a] x) - // result: (VPRORD128 [a] x) + b := v.Block + // match: (ShiftAllRightSignExtendedMaskedInt64x8 x y mask) + // result: (VPSRAQMasked512 x y (VPMOVVec64x8ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - v.reset(OpAMD64VPRORD128) - v.AuxInt = int8ToAuxInt(a) - v.AddArg(x) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRAQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpRotateAllRightInt32x8(v *Value) bool { +func rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedInt16x16(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (RotateAllRightInt32x8 [a] x) - // result: (VPRORD256 [a] x) + b := v.Block + // match: (ShiftLeftAndFillUpperFromMaskedInt16x16 x y z mask) + // result: (VPSHLDVWMasked256 x y z (VPMOVVec16x16ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - v.reset(OpAMD64VPRORD256) - v.AuxInt = int8ToAuxInt(a) - v.AddArg(x) + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHLDVWMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) return true } } -func rewriteValueAMD64_OpRotateAllRightInt64x2(v *Value) bool { +func rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedInt16x32(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (RotateAllRightInt64x2 [a] x) - // result: (VPRORQ128 [a] x) + b := v.Block + // match: (ShiftLeftAndFillUpperFromMaskedInt16x32 x y z mask) + // result: (VPSHLDVWMasked512 x y z (VPMOVVec16x32ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - v.reset(OpAMD64VPRORQ128) - v.AuxInt = int8ToAuxInt(a) - v.AddArg(x) + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHLDVWMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) return true } } -func rewriteValueAMD64_OpRotateAllRightInt64x4(v *Value) bool { +func rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedInt16x8(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (RotateAllRightInt64x4 [a] x) - // result: (VPRORQ256 [a] x) + b := v.Block + // match: (ShiftLeftAndFillUpperFromMaskedInt16x8 x y z mask) + // result: (VPSHLDVWMasked128 x y z (VPMOVVec16x8ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - v.reset(OpAMD64VPRORQ256) - v.AuxInt = int8ToAuxInt(a) - v.AddArg(x) + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHLDVWMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) return true } } -func rewriteValueAMD64_OpRotateAllRightInt64x8(v *Value) bool { +func rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedInt32x16(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (RotateAllRightInt64x8 [a] x) - // result: (VPRORQ512 [a] x) + b := v.Block + // match: (ShiftLeftAndFillUpperFromMaskedInt32x16 x y z mask) + // result: (VPSHLDVDMasked512 x y z (VPMOVVec32x16ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - v.reset(OpAMD64VPRORQ512) - v.AuxInt = int8ToAuxInt(a) - v.AddArg(x) + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHLDVDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) return true } } -func rewriteValueAMD64_OpRotateAllRightUint32x16(v *Value) bool { +func rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedInt32x4(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (RotateAllRightUint32x16 [a] x) - // result: (VPRORD512 [a] x) + b := v.Block + // match: (ShiftLeftAndFillUpperFromMaskedInt32x4 x y z mask) + // result: (VPSHLDVDMasked128 x y z (VPMOVVec32x4ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - v.reset(OpAMD64VPRORD512) - v.AuxInt = int8ToAuxInt(a) - v.AddArg(x) + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHLDVDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) return true } } -func rewriteValueAMD64_OpRotateAllRightUint32x4(v *Value) bool { +func rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedInt32x8(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (RotateAllRightUint32x4 [a] x) - // result: (VPRORD128 [a] x) + b := v.Block + // match: (ShiftLeftAndFillUpperFromMaskedInt32x8 x y z mask) + // result: (VPSHLDVDMasked256 x y z (VPMOVVec32x8ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - v.reset(OpAMD64VPRORD128) - v.AuxInt = int8ToAuxInt(a) - v.AddArg(x) + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHLDVDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) return true } } -func rewriteValueAMD64_OpRotateAllRightUint32x8(v *Value) bool { +func rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedInt64x2(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (RotateAllRightUint32x8 [a] x) - // result: (VPRORD256 [a] x) + b := v.Block + // match: (ShiftLeftAndFillUpperFromMaskedInt64x2 x y z mask) + // result: (VPSHLDVQMasked128 x y z (VPMOVVec64x2ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - v.reset(OpAMD64VPRORD256) - v.AuxInt = int8ToAuxInt(a) - v.AddArg(x) + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHLDVQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) return true } } -func rewriteValueAMD64_OpRotateAllRightUint64x2(v *Value) bool { +func rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedInt64x4(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (RotateAllRightUint64x2 [a] x) - // result: (VPRORQ128 [a] x) + b := v.Block + // match: (ShiftLeftAndFillUpperFromMaskedInt64x4 x y z mask) + // result: (VPSHLDVQMasked256 x y z (VPMOVVec64x4ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - v.reset(OpAMD64VPRORQ128) - v.AuxInt = int8ToAuxInt(a) - v.AddArg(x) + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHLDVQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) return true } } -func rewriteValueAMD64_OpRotateAllRightUint64x4(v *Value) bool { +func rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedInt64x8(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (RotateAllRightUint64x4 [a] x) - // result: (VPRORQ256 [a] x) + b := v.Block + // match: (ShiftLeftAndFillUpperFromMaskedInt64x8 x y z mask) + // result: (VPSHLDVQMasked512 x y z (VPMOVVec64x8ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - v.reset(OpAMD64VPRORQ256) - v.AuxInt = int8ToAuxInt(a) - v.AddArg(x) + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHLDVQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) return true } } -func rewriteValueAMD64_OpRotateAllRightUint64x8(v *Value) bool { +func rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedUint16x16(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (RotateAllRightUint64x8 [a] x) - // result: (VPRORQ512 [a] x) + b := v.Block + // match: (ShiftLeftAndFillUpperFromMaskedUint16x16 x y z mask) + // result: (VPSHLDVWMasked256 x y z (VPMOVVec16x16ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - v.reset(OpAMD64VPRORQ512) - v.AuxInt = int8ToAuxInt(a) - v.AddArg(x) + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHLDVWMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) return true } } -func rewriteValueAMD64_OpRoundFloat32x4(v *Value) bool { +func rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedUint16x32(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (RoundFloat32x4 x) - // result: (VROUNDPS128 [0] x) + b := v.Block + // match: (ShiftLeftAndFillUpperFromMaskedUint16x32 x y z mask) + // result: (VPSHLDVWMasked512 x y z (VPMOVVec16x32ToM mask)) for { x := v_0 - v.reset(OpAMD64VROUNDPS128) - v.AuxInt = int8ToAuxInt(0) - v.AddArg(x) + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHLDVWMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) return true } } -func rewriteValueAMD64_OpRoundFloat32x8(v *Value) bool { +func rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedUint16x8(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (RoundFloat32x8 x) - // result: (VROUNDPS256 [0] x) + b := v.Block + // match: (ShiftLeftAndFillUpperFromMaskedUint16x8 x y z mask) + // result: (VPSHLDVWMasked128 x y z (VPMOVVec16x8ToM mask)) for { x := v_0 - v.reset(OpAMD64VROUNDPS256) - v.AuxInt = int8ToAuxInt(0) - v.AddArg(x) + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHLDVWMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) return true } } -func rewriteValueAMD64_OpRoundFloat64x2(v *Value) bool { +func rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedUint32x16(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (RoundFloat64x2 x) - // result: (VROUNDPD128 [0] x) + b := v.Block + // match: (ShiftLeftAndFillUpperFromMaskedUint32x16 x y z mask) + // result: (VPSHLDVDMasked512 x y z (VPMOVVec32x16ToM mask)) for { x := v_0 - v.reset(OpAMD64VROUNDPD128) - v.AuxInt = int8ToAuxInt(0) - v.AddArg(x) + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHLDVDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) return true } } -func rewriteValueAMD64_OpRoundFloat64x4(v *Value) bool { +func rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedUint32x4(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (RoundFloat64x4 x) - // result: (VROUNDPD256 [0] x) + b := v.Block + // match: (ShiftLeftAndFillUpperFromMaskedUint32x4 x y z mask) + // result: (VPSHLDVDMasked128 x y z (VPMOVVec32x4ToM mask)) for { x := v_0 - v.reset(OpAMD64VROUNDPD256) - v.AuxInt = int8ToAuxInt(0) - v.AddArg(x) + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHLDVDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) return true } } -func rewriteValueAMD64_OpRoundToEven(v *Value) bool { +func rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedUint32x8(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (RoundToEven x) - // result: (ROUNDSD [0] x) + b := v.Block + // match: (ShiftLeftAndFillUpperFromMaskedUint32x8 x y z mask) + // result: (VPSHLDVDMasked256 x y z (VPMOVVec32x8ToM mask)) for { x := v_0 - v.reset(OpAMD64ROUNDSD) - v.AuxInt = int8ToAuxInt(0) - v.AddArg(x) + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHLDVDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) return true } } -func rewriteValueAMD64_OpRoundWithPrecisionFloat32x16(v *Value) bool { +func rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedUint64x2(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (RoundWithPrecisionFloat32x16 [a] x) - // result: (VRNDSCALEPS512 [a+0] x) + b := v.Block + // match: (ShiftLeftAndFillUpperFromMaskedUint64x2 x y z mask) + // result: (VPSHLDVQMasked128 x y z (VPMOVVec64x2ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - v.reset(OpAMD64VRNDSCALEPS512) - v.AuxInt = int8ToAuxInt(a + 0) - v.AddArg(x) + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHLDVQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) return true } } -func rewriteValueAMD64_OpRoundWithPrecisionFloat32x4(v *Value) bool { +func rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedUint64x4(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (RoundWithPrecisionFloat32x4 [a] x) - // result: (VRNDSCALEPS128 [a+0] x) + b := v.Block + // match: (ShiftLeftAndFillUpperFromMaskedUint64x4 x y z mask) + // result: (VPSHLDVQMasked256 x y z (VPMOVVec64x4ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - v.reset(OpAMD64VRNDSCALEPS128) - v.AuxInt = int8ToAuxInt(a + 0) - v.AddArg(x) + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHLDVQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) return true } } -func rewriteValueAMD64_OpRoundWithPrecisionFloat32x8(v *Value) bool { +func rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedUint64x8(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (RoundWithPrecisionFloat32x8 [a] x) - // result: (VRNDSCALEPS256 [a+0] x) + b := v.Block + // match: (ShiftLeftAndFillUpperFromMaskedUint64x8 x y z mask) + // result: (VPSHLDVQMasked512 x y z (VPMOVVec64x8ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - v.reset(OpAMD64VRNDSCALEPS256) - v.AuxInt = int8ToAuxInt(a + 0) - v.AddArg(x) + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHLDVQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) return true } } -func rewriteValueAMD64_OpRoundWithPrecisionFloat64x2(v *Value) bool { +func rewriteValueAMD64_OpShiftLeftMaskedInt16x16(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (RoundWithPrecisionFloat64x2 [a] x) - // result: (VRNDSCALEPD128 [a+0] x) + b := v.Block + // match: (ShiftLeftMaskedInt16x16 x y mask) + // result: (VPSLLVWMasked256 x y (VPMOVVec16x16ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - v.reset(OpAMD64VRNDSCALEPD128) - v.AuxInt = int8ToAuxInt(a + 0) - v.AddArg(x) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSLLVWMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpRoundWithPrecisionFloat64x4(v *Value) bool { +func rewriteValueAMD64_OpShiftLeftMaskedInt16x32(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (RoundWithPrecisionFloat64x4 [a] x) - // result: (VRNDSCALEPD256 [a+0] x) + b := v.Block + // match: (ShiftLeftMaskedInt16x32 x y mask) + // result: (VPSLLVWMasked512 x y (VPMOVVec16x32ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - v.reset(OpAMD64VRNDSCALEPD256) - v.AuxInt = int8ToAuxInt(a + 0) - v.AddArg(x) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSLLVWMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpRoundWithPrecisionFloat64x8(v *Value) bool { +func rewriteValueAMD64_OpShiftLeftMaskedInt16x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (RoundWithPrecisionFloat64x8 [a] x) - // result: (VRNDSCALEPD512 [a+0] x) + b := v.Block + // match: (ShiftLeftMaskedInt16x8 x y mask) + // result: (VPSLLVWMasked128 x y (VPMOVVec16x8ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - v.reset(OpAMD64VRNDSCALEPD512) - v.AuxInt = int8ToAuxInt(a + 0) - v.AddArg(x) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSLLVWMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpRsh16Ux16(v *Value) bool { +func rewriteValueAMD64_OpShiftLeftMaskedInt32x16(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Rsh16Ux16 x y) - // cond: !shiftIsBounded(v) - // result: (ANDL (SHRW x y) (SBBLcarrymask (CMPWconst y [16]))) + // match: (ShiftLeftMaskedInt32x16 x y mask) + // result: (VPSLLVDMasked512 x y (VPMOVVec32x16ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64ANDL) - v0 := b.NewValue0(v.Pos, OpAMD64SHRW, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) - v2.AuxInt = int16ToAuxInt(16) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) + mask := v_2 + v.reset(OpAMD64VPSLLVDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (Rsh16Ux16 x y) - // cond: shiftIsBounded(v) - // result: (SHRW x y) +} +func rewriteValueAMD64_OpShiftLeftMaskedInt32x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftLeftMaskedInt32x4 x y mask) + // result: (VPSLLVDMasked128 x y (VPMOVVec32x4ToM mask)) for { x := v_0 y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SHRW) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSLLVDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - return false } -func rewriteValueAMD64_OpRsh16Ux32(v *Value) bool { +func rewriteValueAMD64_OpShiftLeftMaskedInt32x8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Rsh16Ux32 x y) - // cond: !shiftIsBounded(v) - // result: (ANDL (SHRW x y) (SBBLcarrymask (CMPLconst y [16]))) + // match: (ShiftLeftMaskedInt32x8 x y mask) + // result: (VPSLLVDMasked256 x y (VPMOVVec32x8ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64ANDL) - v0 := b.NewValue0(v.Pos, OpAMD64SHRW, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) - v2.AuxInt = int32ToAuxInt(16) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) + mask := v_2 + v.reset(OpAMD64VPSLLVDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (Rsh16Ux32 x y) - // cond: shiftIsBounded(v) - // result: (SHRW x y) +} +func rewriteValueAMD64_OpShiftLeftMaskedInt64x2(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftLeftMaskedInt64x2 x y mask) + // result: (VPSLLVQMasked128 x y (VPMOVVec64x2ToM mask)) for { x := v_0 y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SHRW) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSLLVQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - return false } -func rewriteValueAMD64_OpRsh16Ux64(v *Value) bool { +func rewriteValueAMD64_OpShiftLeftMaskedInt64x4(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Rsh16Ux64 x y) - // cond: !shiftIsBounded(v) - // result: (ANDL (SHRW x y) (SBBLcarrymask (CMPQconst y [16]))) + // match: (ShiftLeftMaskedInt64x4 x y mask) + // result: (VPSLLVQMasked256 x y (VPMOVVec64x4ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64ANDL) - v0 := b.NewValue0(v.Pos, OpAMD64SHRW, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) - v2.AuxInt = int32ToAuxInt(16) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) + mask := v_2 + v.reset(OpAMD64VPSLLVQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (Rsh16Ux64 x y) - // cond: shiftIsBounded(v) - // result: (SHRW x y) +} +func rewriteValueAMD64_OpShiftLeftMaskedInt64x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftLeftMaskedInt64x8 x y mask) + // result: (VPSLLVQMasked512 x y (VPMOVVec64x8ToM mask)) for { x := v_0 y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SHRW) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSLLVQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - return false } -func rewriteValueAMD64_OpRsh16Ux8(v *Value) bool { +func rewriteValueAMD64_OpShiftLeftMaskedUint16x16(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Rsh16Ux8 x y) - // cond: !shiftIsBounded(v) - // result: (ANDL (SHRW x y) (SBBLcarrymask (CMPBconst y [16]))) + // match: (ShiftLeftMaskedUint16x16 x y mask) + // result: (VPSLLVWMasked256 x y (VPMOVVec16x16ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64ANDL) - v0 := b.NewValue0(v.Pos, OpAMD64SHRW, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) - v2.AuxInt = int8ToAuxInt(16) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) + mask := v_2 + v.reset(OpAMD64VPSLLVWMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (Rsh16Ux8 x y) - // cond: shiftIsBounded(v) - // result: (SHRW x y) +} +func rewriteValueAMD64_OpShiftLeftMaskedUint16x32(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftLeftMaskedUint16x32 x y mask) + // result: (VPSLLVWMasked512 x y (VPMOVVec16x32ToM mask)) for { x := v_0 y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SHRW) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSLLVWMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - return false } -func rewriteValueAMD64_OpRsh16x16(v *Value) bool { +func rewriteValueAMD64_OpShiftLeftMaskedUint16x8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Rsh16x16 x y) - // cond: !shiftIsBounded(v) - // result: (SARW x (ORL y (NOTL (SBBLcarrymask (CMPWconst y [16]))))) + // match: (ShiftLeftMaskedUint16x8 x y mask) + // result: (VPSLLVWMasked128 x y (VPMOVVec16x8ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SARW) - v.Type = t - v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type) - v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type) - v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type) - v3 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) - v3.AuxInt = int16ToAuxInt(16) - v3.AddArg(y) - v2.AddArg(v3) - v1.AddArg(v2) - v0.AddArg2(y, v1) - v.AddArg2(x, v0) + mask := v_2 + v.reset(OpAMD64VPSLLVWMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (Rsh16x16 x y) - // cond: shiftIsBounded(v) - // result: (SARW x y) +} +func rewriteValueAMD64_OpShiftLeftMaskedUint32x16(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftLeftMaskedUint32x16 x y mask) + // result: (VPSLLVDMasked512 x y (VPMOVVec32x16ToM mask)) for { x := v_0 y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SARW) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSLLVDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - return false } -func rewriteValueAMD64_OpRsh16x32(v *Value) bool { +func rewriteValueAMD64_OpShiftLeftMaskedUint32x4(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Rsh16x32 x y) - // cond: !shiftIsBounded(v) - // result: (SARW x (ORL y (NOTL (SBBLcarrymask (CMPLconst y [16]))))) + // match: (ShiftLeftMaskedUint32x4 x y mask) + // result: (VPSLLVDMasked128 x y (VPMOVVec32x4ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SARW) - v.Type = t - v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type) - v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type) - v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type) - v3 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) - v3.AuxInt = int32ToAuxInt(16) - v3.AddArg(y) - v2.AddArg(v3) - v1.AddArg(v2) - v0.AddArg2(y, v1) - v.AddArg2(x, v0) + mask := v_2 + v.reset(OpAMD64VPSLLVDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (Rsh16x32 x y) - // cond: shiftIsBounded(v) - // result: (SARW x y) +} +func rewriteValueAMD64_OpShiftLeftMaskedUint32x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftLeftMaskedUint32x8 x y mask) + // result: (VPSLLVDMasked256 x y (VPMOVVec32x8ToM mask)) for { x := v_0 y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SARW) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSLLVDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - return false } -func rewriteValueAMD64_OpRsh16x64(v *Value) bool { +func rewriteValueAMD64_OpShiftLeftMaskedUint64x2(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Rsh16x64 x y) - // cond: !shiftIsBounded(v) - // result: (SARW x (ORQ y (NOTQ (SBBQcarrymask (CMPQconst y [16]))))) + // match: (ShiftLeftMaskedUint64x2 x y mask) + // result: (VPSLLVQMasked128 x y (VPMOVVec64x2ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SARW) - v.Type = t - v0 := b.NewValue0(v.Pos, OpAMD64ORQ, y.Type) - v1 := b.NewValue0(v.Pos, OpAMD64NOTQ, y.Type) - v2 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, y.Type) - v3 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) - v3.AuxInt = int32ToAuxInt(16) - v3.AddArg(y) - v2.AddArg(v3) - v1.AddArg(v2) - v0.AddArg2(y, v1) - v.AddArg2(x, v0) + mask := v_2 + v.reset(OpAMD64VPSLLVQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (Rsh16x64 x y) - // cond: shiftIsBounded(v) - // result: (SARW x y) +} +func rewriteValueAMD64_OpShiftLeftMaskedUint64x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftLeftMaskedUint64x4 x y mask) + // result: (VPSLLVQMasked256 x y (VPMOVVec64x4ToM mask)) for { x := v_0 y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SARW) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSLLVQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - return false } -func rewriteValueAMD64_OpRsh16x8(v *Value) bool { +func rewriteValueAMD64_OpShiftLeftMaskedUint64x8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Rsh16x8 x y) - // cond: !shiftIsBounded(v) - // result: (SARW x (ORL y (NOTL (SBBLcarrymask (CMPBconst y [16]))))) + // match: (ShiftLeftMaskedUint64x8 x y mask) + // result: (VPSLLVQMasked512 x y (VPMOVVec64x8ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SARW) - v.Type = t - v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type) - v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type) - v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type) - v3 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) - v3.AuxInt = int8ToAuxInt(16) - v3.AddArg(y) - v2.AddArg(v3) - v1.AddArg(v2) - v0.AddArg2(y, v1) - v.AddArg2(x, v0) + mask := v_2 + v.reset(OpAMD64VPSLLVQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (Rsh16x8 x y) - // cond: shiftIsBounded(v) - // result: (SARW x y) +} +func rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedInt16x16(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftRightAndFillUpperFromMaskedInt16x16 x y z mask) + // result: (VPSHRDVWMasked256 x y z (VPMOVVec16x16ToM mask)) for { x := v_0 y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SARW) - v.AddArg2(x, y) + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHRDVWMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) return true } - return false } -func rewriteValueAMD64_OpRsh32Ux16(v *Value) bool { +func rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedInt16x32(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Rsh32Ux16 x y) - // cond: !shiftIsBounded(v) - // result: (ANDL (SHRL x y) (SBBLcarrymask (CMPWconst y [32]))) + // match: (ShiftRightAndFillUpperFromMaskedInt16x32 x y z mask) + // result: (VPSHRDVWMasked512 x y z (VPMOVVec16x32ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64ANDL) - v0 := b.NewValue0(v.Pos, OpAMD64SHRL, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) - v2.AuxInt = int16ToAuxInt(32) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHRDVWMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) return true } - // match: (Rsh32Ux16 x y) - // cond: shiftIsBounded(v) - // result: (SHRL x y) +} +func rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedInt16x8(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftRightAndFillUpperFromMaskedInt16x8 x y z mask) + // result: (VPSHRDVWMasked128 x y z (VPMOVVec16x8ToM mask)) for { x := v_0 y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SHRL) - v.AddArg2(x, y) + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHRDVWMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) return true } - return false } -func rewriteValueAMD64_OpRsh32Ux32(v *Value) bool { +func rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedInt32x16(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Rsh32Ux32 x y) - // cond: !shiftIsBounded(v) - // result: (ANDL (SHRL x y) (SBBLcarrymask (CMPLconst y [32]))) + // match: (ShiftRightAndFillUpperFromMaskedInt32x16 x y z mask) + // result: (VPSHRDVDMasked512 x y z (VPMOVVec32x16ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64ANDL) - v0 := b.NewValue0(v.Pos, OpAMD64SHRL, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) - v2.AuxInt = int32ToAuxInt(32) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHRDVDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) return true } - // match: (Rsh32Ux32 x y) - // cond: shiftIsBounded(v) - // result: (SHRL x y) +} +func rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedInt32x4(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftRightAndFillUpperFromMaskedInt32x4 x y z mask) + // result: (VPSHRDVDMasked128 x y z (VPMOVVec32x4ToM mask)) for { x := v_0 y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SHRL) - v.AddArg2(x, y) + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHRDVDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) return true } - return false } -func rewriteValueAMD64_OpRsh32Ux64(v *Value) bool { +func rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedInt32x8(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Rsh32Ux64 x y) - // cond: !shiftIsBounded(v) - // result: (ANDL (SHRL x y) (SBBLcarrymask (CMPQconst y [32]))) + // match: (ShiftRightAndFillUpperFromMaskedInt32x8 x y z mask) + // result: (VPSHRDVDMasked256 x y z (VPMOVVec32x8ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64ANDL) - v0 := b.NewValue0(v.Pos, OpAMD64SHRL, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) - v2.AuxInt = int32ToAuxInt(32) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHRDVDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) return true } - // match: (Rsh32Ux64 x y) - // cond: shiftIsBounded(v) - // result: (SHRL x y) +} +func rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedInt64x2(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftRightAndFillUpperFromMaskedInt64x2 x y z mask) + // result: (VPSHRDVQMasked128 x y z (VPMOVVec64x2ToM mask)) for { x := v_0 y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SHRL) - v.AddArg2(x, y) + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHRDVQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) return true } - return false } -func rewriteValueAMD64_OpRsh32Ux8(v *Value) bool { +func rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedInt64x4(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Rsh32Ux8 x y) - // cond: !shiftIsBounded(v) - // result: (ANDL (SHRL x y) (SBBLcarrymask (CMPBconst y [32]))) + // match: (ShiftRightAndFillUpperFromMaskedInt64x4 x y z mask) + // result: (VPSHRDVQMasked256 x y z (VPMOVVec64x4ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64ANDL) - v0 := b.NewValue0(v.Pos, OpAMD64SHRL, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) - v2.AuxInt = int8ToAuxInt(32) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHRDVQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) return true } - // match: (Rsh32Ux8 x y) - // cond: shiftIsBounded(v) - // result: (SHRL x y) +} +func rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedInt64x8(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftRightAndFillUpperFromMaskedInt64x8 x y z mask) + // result: (VPSHRDVQMasked512 x y z (VPMOVVec64x8ToM mask)) for { x := v_0 y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SHRL) - v.AddArg2(x, y) + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHRDVQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) return true } - return false } -func rewriteValueAMD64_OpRsh32x16(v *Value) bool { +func rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedUint16x16(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Rsh32x16 x y) - // cond: !shiftIsBounded(v) - // result: (SARL x (ORL y (NOTL (SBBLcarrymask (CMPWconst y [32]))))) + // match: (ShiftRightAndFillUpperFromMaskedUint16x16 x y z mask) + // result: (VPSHRDVWMasked256 x y z (VPMOVVec16x16ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SARL) - v.Type = t - v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type) - v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type) - v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type) - v3 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) - v3.AuxInt = int16ToAuxInt(32) - v3.AddArg(y) - v2.AddArg(v3) - v1.AddArg(v2) - v0.AddArg2(y, v1) - v.AddArg2(x, v0) + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHRDVWMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) return true } - // match: (Rsh32x16 x y) - // cond: shiftIsBounded(v) - // result: (SARL x y) +} +func rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedUint16x32(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftRightAndFillUpperFromMaskedUint16x32 x y z mask) + // result: (VPSHRDVWMasked512 x y z (VPMOVVec16x32ToM mask)) for { x := v_0 y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SARL) - v.AddArg2(x, y) + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHRDVWMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) return true } - return false } -func rewriteValueAMD64_OpRsh32x32(v *Value) bool { +func rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedUint16x8(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Rsh32x32 x y) - // cond: !shiftIsBounded(v) - // result: (SARL x (ORL y (NOTL (SBBLcarrymask (CMPLconst y [32]))))) + // match: (ShiftRightAndFillUpperFromMaskedUint16x8 x y z mask) + // result: (VPSHRDVWMasked128 x y z (VPMOVVec16x8ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SARL) - v.Type = t - v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type) - v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type) - v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type) - v3 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) - v3.AuxInt = int32ToAuxInt(32) - v3.AddArg(y) - v2.AddArg(v3) - v1.AddArg(v2) - v0.AddArg2(y, v1) - v.AddArg2(x, v0) + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHRDVWMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) return true } - // match: (Rsh32x32 x y) - // cond: shiftIsBounded(v) - // result: (SARL x y) +} +func rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedUint32x16(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftRightAndFillUpperFromMaskedUint32x16 x y z mask) + // result: (VPSHRDVDMasked512 x y z (VPMOVVec32x16ToM mask)) for { x := v_0 y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SARL) - v.AddArg2(x, y) + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHRDVDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) return true } - return false } -func rewriteValueAMD64_OpRsh32x64(v *Value) bool { +func rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedUint32x4(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Rsh32x64 x y) - // cond: !shiftIsBounded(v) - // result: (SARL x (ORQ y (NOTQ (SBBQcarrymask (CMPQconst y [32]))))) + // match: (ShiftRightAndFillUpperFromMaskedUint32x4 x y z mask) + // result: (VPSHRDVDMasked128 x y z (VPMOVVec32x4ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SARL) - v.Type = t - v0 := b.NewValue0(v.Pos, OpAMD64ORQ, y.Type) - v1 := b.NewValue0(v.Pos, OpAMD64NOTQ, y.Type) - v2 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, y.Type) - v3 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) - v3.AuxInt = int32ToAuxInt(32) - v3.AddArg(y) - v2.AddArg(v3) - v1.AddArg(v2) - v0.AddArg2(y, v1) - v.AddArg2(x, v0) + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHRDVDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) return true } - // match: (Rsh32x64 x y) - // cond: shiftIsBounded(v) - // result: (SARL x y) +} +func rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedUint32x8(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftRightAndFillUpperFromMaskedUint32x8 x y z mask) + // result: (VPSHRDVDMasked256 x y z (VPMOVVec32x8ToM mask)) for { x := v_0 y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SARL) - v.AddArg2(x, y) + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHRDVDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) + return true + } +} +func rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedUint64x2(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftRightAndFillUpperFromMaskedUint64x2 x y z mask) + // result: (VPSHRDVQMasked128 x y z (VPMOVVec64x2ToM mask)) + for { + x := v_0 + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHRDVQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) return true } - return false } -func rewriteValueAMD64_OpRsh32x8(v *Value) bool { +func rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedUint64x4(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Rsh32x8 x y) - // cond: !shiftIsBounded(v) - // result: (SARL x (ORL y (NOTL (SBBLcarrymask (CMPBconst y [32]))))) + // match: (ShiftRightAndFillUpperFromMaskedUint64x4 x y z mask) + // result: (VPSHRDVQMasked256 x y z (VPMOVVec64x4ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SARL) - v.Type = t - v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type) - v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type) - v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type) - v3 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) - v3.AuxInt = int8ToAuxInt(32) - v3.AddArg(y) - v2.AddArg(v3) - v1.AddArg(v2) - v0.AddArg2(y, v1) - v.AddArg2(x, v0) + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHRDVQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) return true } - // match: (Rsh32x8 x y) - // cond: shiftIsBounded(v) - // result: (SARL x y) +} +func rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedUint64x8(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftRightAndFillUpperFromMaskedUint64x8 x y z mask) + // result: (VPSHRDVQMasked512 x y z (VPMOVVec64x8ToM mask)) for { x := v_0 y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SARL) - v.AddArg2(x, y) + z := v_2 + mask := v_3 + v.reset(OpAMD64VPSHRDVQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) return true } - return false } -func rewriteValueAMD64_OpRsh64Ux16(v *Value) bool { +func rewriteValueAMD64_OpShiftRightMaskedInt16x16(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Rsh64Ux16 x y) - // cond: !shiftIsBounded(v) - // result: (ANDQ (SHRQ x y) (SBBQcarrymask (CMPWconst y [64]))) + // match: (ShiftRightMaskedInt16x16 x y mask) + // result: (VPSRLVWMasked256 x y (VPMOVVec16x16ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64ANDQ) - v0 := b.NewValue0(v.Pos, OpAMD64SHRQ, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) - v2.AuxInt = int16ToAuxInt(64) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) + mask := v_2 + v.reset(OpAMD64VPSRLVWMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (Rsh64Ux16 x y) - // cond: shiftIsBounded(v) - // result: (SHRQ x y) +} +func rewriteValueAMD64_OpShiftRightMaskedInt16x32(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftRightMaskedInt16x32 x y mask) + // result: (VPSRLVWMasked512 x y (VPMOVVec16x32ToM mask)) for { x := v_0 y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SHRQ) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSRLVWMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - return false } -func rewriteValueAMD64_OpRsh64Ux32(v *Value) bool { +func rewriteValueAMD64_OpShiftRightMaskedInt16x8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Rsh64Ux32 x y) - // cond: !shiftIsBounded(v) - // result: (ANDQ (SHRQ x y) (SBBQcarrymask (CMPLconst y [64]))) + // match: (ShiftRightMaskedInt16x8 x y mask) + // result: (VPSRLVWMasked128 x y (VPMOVVec16x8ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64ANDQ) - v0 := b.NewValue0(v.Pos, OpAMD64SHRQ, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) - v2.AuxInt = int32ToAuxInt(64) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) + mask := v_2 + v.reset(OpAMD64VPSRLVWMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (Rsh64Ux32 x y) - // cond: shiftIsBounded(v) - // result: (SHRQ x y) +} +func rewriteValueAMD64_OpShiftRightMaskedInt32x16(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftRightMaskedInt32x16 x y mask) + // result: (VPSRLVDMasked512 x y (VPMOVVec32x16ToM mask)) for { x := v_0 y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SHRQ) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSRLVDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - return false } -func rewriteValueAMD64_OpRsh64Ux64(v *Value) bool { +func rewriteValueAMD64_OpShiftRightMaskedInt32x4(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Rsh64Ux64 x y) - // cond: !shiftIsBounded(v) - // result: (ANDQ (SHRQ x y) (SBBQcarrymask (CMPQconst y [64]))) + // match: (ShiftRightMaskedInt32x4 x y mask) + // result: (VPSRLVDMasked128 x y (VPMOVVec32x4ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64ANDQ) - v0 := b.NewValue0(v.Pos, OpAMD64SHRQ, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) - v2.AuxInt = int32ToAuxInt(64) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) + mask := v_2 + v.reset(OpAMD64VPSRLVDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (Rsh64Ux64 x y) - // cond: shiftIsBounded(v) - // result: (SHRQ x y) +} +func rewriteValueAMD64_OpShiftRightMaskedInt32x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftRightMaskedInt32x8 x y mask) + // result: (VPSRLVDMasked256 x y (VPMOVVec32x8ToM mask)) for { x := v_0 y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SHRQ) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSRLVDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - return false } -func rewriteValueAMD64_OpRsh64Ux8(v *Value) bool { +func rewriteValueAMD64_OpShiftRightMaskedInt64x2(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Rsh64Ux8 x y) - // cond: !shiftIsBounded(v) - // result: (ANDQ (SHRQ x y) (SBBQcarrymask (CMPBconst y [64]))) + // match: (ShiftRightMaskedInt64x2 x y mask) + // result: (VPSRLVQMasked128 x y (VPMOVVec64x2ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64ANDQ) - v0 := b.NewValue0(v.Pos, OpAMD64SHRQ, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) - v2.AuxInt = int8ToAuxInt(64) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) + mask := v_2 + v.reset(OpAMD64VPSRLVQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (Rsh64Ux8 x y) - // cond: shiftIsBounded(v) - // result: (SHRQ x y) +} +func rewriteValueAMD64_OpShiftRightMaskedInt64x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftRightMaskedInt64x4 x y mask) + // result: (VPSRLVQMasked256 x y (VPMOVVec64x4ToM mask)) for { x := v_0 y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SHRQ) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSRLVQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - return false } -func rewriteValueAMD64_OpRsh64x16(v *Value) bool { +func rewriteValueAMD64_OpShiftRightMaskedInt64x8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Rsh64x16 x y) - // cond: !shiftIsBounded(v) - // result: (SARQ x (ORL y (NOTL (SBBLcarrymask (CMPWconst y [64]))))) + // match: (ShiftRightMaskedInt64x8 x y mask) + // result: (VPSRLVQMasked512 x y (VPMOVVec64x8ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SARQ) - v.Type = t - v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type) - v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type) - v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type) - v3 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) - v3.AuxInt = int16ToAuxInt(64) - v3.AddArg(y) - v2.AddArg(v3) - v1.AddArg(v2) - v0.AddArg2(y, v1) - v.AddArg2(x, v0) + mask := v_2 + v.reset(OpAMD64VPSRLVQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (Rsh64x16 x y) - // cond: shiftIsBounded(v) - // result: (SARQ x y) +} +func rewriteValueAMD64_OpShiftRightMaskedUint16x16(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftRightMaskedUint16x16 x y mask) + // result: (VPSRLVWMasked256 x y (VPMOVVec16x16ToM mask)) for { x := v_0 y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SARQ) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSRLVWMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - return false } -func rewriteValueAMD64_OpRsh64x32(v *Value) bool { +func rewriteValueAMD64_OpShiftRightMaskedUint16x32(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Rsh64x32 x y) - // cond: !shiftIsBounded(v) - // result: (SARQ x (ORL y (NOTL (SBBLcarrymask (CMPLconst y [64]))))) + // match: (ShiftRightMaskedUint16x32 x y mask) + // result: (VPSRLVWMasked512 x y (VPMOVVec16x32ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SARQ) - v.Type = t - v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type) - v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type) - v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type) - v3 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) - v3.AuxInt = int32ToAuxInt(64) - v3.AddArg(y) - v2.AddArg(v3) - v1.AddArg(v2) - v0.AddArg2(y, v1) - v.AddArg2(x, v0) + mask := v_2 + v.reset(OpAMD64VPSRLVWMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (Rsh64x32 x y) - // cond: shiftIsBounded(v) - // result: (SARQ x y) +} +func rewriteValueAMD64_OpShiftRightMaskedUint16x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftRightMaskedUint16x8 x y mask) + // result: (VPSRLVWMasked128 x y (VPMOVVec16x8ToM mask)) for { x := v_0 y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SARQ) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSRLVWMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - return false } -func rewriteValueAMD64_OpRsh64x64(v *Value) bool { +func rewriteValueAMD64_OpShiftRightMaskedUint32x16(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Rsh64x64 x y) - // cond: !shiftIsBounded(v) - // result: (SARQ x (ORQ y (NOTQ (SBBQcarrymask (CMPQconst y [64]))))) + // match: (ShiftRightMaskedUint32x16 x y mask) + // result: (VPSRLVDMasked512 x y (VPMOVVec32x16ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SARQ) - v.Type = t - v0 := b.NewValue0(v.Pos, OpAMD64ORQ, y.Type) - v1 := b.NewValue0(v.Pos, OpAMD64NOTQ, y.Type) - v2 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, y.Type) - v3 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) - v3.AuxInt = int32ToAuxInt(64) - v3.AddArg(y) - v2.AddArg(v3) - v1.AddArg(v2) - v0.AddArg2(y, v1) - v.AddArg2(x, v0) + mask := v_2 + v.reset(OpAMD64VPSRLVDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (Rsh64x64 x y) - // cond: shiftIsBounded(v) - // result: (SARQ x y) +} +func rewriteValueAMD64_OpShiftRightMaskedUint32x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftRightMaskedUint32x4 x y mask) + // result: (VPSRLVDMasked128 x y (VPMOVVec32x4ToM mask)) for { x := v_0 y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SARQ) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSRLVDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - return false } -func rewriteValueAMD64_OpRsh64x8(v *Value) bool { +func rewriteValueAMD64_OpShiftRightMaskedUint32x8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Rsh64x8 x y) - // cond: !shiftIsBounded(v) - // result: (SARQ x (ORL y (NOTL (SBBLcarrymask (CMPBconst y [64]))))) + // match: (ShiftRightMaskedUint32x8 x y mask) + // result: (VPSRLVDMasked256 x y (VPMOVVec32x8ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SARQ) - v.Type = t - v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type) - v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type) - v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type) - v3 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) - v3.AuxInt = int8ToAuxInt(64) - v3.AddArg(y) - v2.AddArg(v3) - v1.AddArg(v2) - v0.AddArg2(y, v1) - v.AddArg2(x, v0) + mask := v_2 + v.reset(OpAMD64VPSRLVDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (Rsh64x8 x y) - // cond: shiftIsBounded(v) - // result: (SARQ x y) +} +func rewriteValueAMD64_OpShiftRightMaskedUint64x2(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftRightMaskedUint64x2 x y mask) + // result: (VPSRLVQMasked128 x y (VPMOVVec64x2ToM mask)) for { x := v_0 y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SARQ) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSRLVQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - return false } -func rewriteValueAMD64_OpRsh8Ux16(v *Value) bool { +func rewriteValueAMD64_OpShiftRightMaskedUint64x4(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Rsh8Ux16 x y) - // cond: !shiftIsBounded(v) - // result: (ANDL (SHRB x y) (SBBLcarrymask (CMPWconst y [8]))) + // match: (ShiftRightMaskedUint64x4 x y mask) + // result: (VPSRLVQMasked256 x y (VPMOVVec64x4ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64ANDL) - v0 := b.NewValue0(v.Pos, OpAMD64SHRB, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) - v2.AuxInt = int16ToAuxInt(8) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) + mask := v_2 + v.reset(OpAMD64VPSRLVQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (Rsh8Ux16 x y) - // cond: shiftIsBounded(v) - // result: (SHRB x y) +} +func rewriteValueAMD64_OpShiftRightMaskedUint64x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftRightMaskedUint64x8 x y mask) + // result: (VPSRLVQMasked512 x y (VPMOVVec64x8ToM mask)) for { x := v_0 y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SHRB) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSRLVQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - return false } -func rewriteValueAMD64_OpRsh8Ux32(v *Value) bool { +func rewriteValueAMD64_OpShiftRightSignExtendedMaskedInt16x16(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Rsh8Ux32 x y) - // cond: !shiftIsBounded(v) - // result: (ANDL (SHRB x y) (SBBLcarrymask (CMPLconst y [8]))) + // match: (ShiftRightSignExtendedMaskedInt16x16 x y mask) + // result: (VPSRAVWMasked256 x y (VPMOVVec16x16ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64ANDL) - v0 := b.NewValue0(v.Pos, OpAMD64SHRB, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) - v2.AuxInt = int32ToAuxInt(8) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) + mask := v_2 + v.reset(OpAMD64VPSRAVWMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (Rsh8Ux32 x y) - // cond: shiftIsBounded(v) - // result: (SHRB x y) +} +func rewriteValueAMD64_OpShiftRightSignExtendedMaskedInt16x32(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftRightSignExtendedMaskedInt16x32 x y mask) + // result: (VPSRAVWMasked512 x y (VPMOVVec16x32ToM mask)) for { x := v_0 y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SHRB) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSRAVWMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - return false } -func rewriteValueAMD64_OpRsh8Ux64(v *Value) bool { +func rewriteValueAMD64_OpShiftRightSignExtendedMaskedInt16x8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Rsh8Ux64 x y) - // cond: !shiftIsBounded(v) - // result: (ANDL (SHRB x y) (SBBLcarrymask (CMPQconst y [8]))) + // match: (ShiftRightSignExtendedMaskedInt16x8 x y mask) + // result: (VPSRAVWMasked128 x y (VPMOVVec16x8ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64ANDL) - v0 := b.NewValue0(v.Pos, OpAMD64SHRB, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) - v2.AuxInt = int32ToAuxInt(8) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) + mask := v_2 + v.reset(OpAMD64VPSRAVWMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (Rsh8Ux64 x y) - // cond: shiftIsBounded(v) - // result: (SHRB x y) +} +func rewriteValueAMD64_OpShiftRightSignExtendedMaskedInt32x16(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftRightSignExtendedMaskedInt32x16 x y mask) + // result: (VPSRAVDMasked512 x y (VPMOVVec32x16ToM mask)) for { x := v_0 y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SHRB) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSRAVDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - return false } -func rewriteValueAMD64_OpRsh8Ux8(v *Value) bool { +func rewriteValueAMD64_OpShiftRightSignExtendedMaskedInt32x4(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Rsh8Ux8 x y) - // cond: !shiftIsBounded(v) - // result: (ANDL (SHRB x y) (SBBLcarrymask (CMPBconst y [8]))) + // match: (ShiftRightSignExtendedMaskedInt32x4 x y mask) + // result: (VPSRAVDMasked128 x y (VPMOVVec32x4ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64ANDL) - v0 := b.NewValue0(v.Pos, OpAMD64SHRB, t) - v0.AddArg2(x, y) - v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t) - v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) - v2.AuxInt = int8ToAuxInt(8) - v2.AddArg(y) - v1.AddArg(v2) - v.AddArg2(v0, v1) + mask := v_2 + v.reset(OpAMD64VPSRAVDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (Rsh8Ux8 x y) - // cond: shiftIsBounded(v) - // result: (SHRB x y) +} +func rewriteValueAMD64_OpShiftRightSignExtendedMaskedInt32x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftRightSignExtendedMaskedInt32x8 x y mask) + // result: (VPSRAVDMasked256 x y (VPMOVVec32x8ToM mask)) for { x := v_0 y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SHRB) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSRAVDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - return false } -func rewriteValueAMD64_OpRsh8x16(v *Value) bool { +func rewriteValueAMD64_OpShiftRightSignExtendedMaskedInt64x2(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Rsh8x16 x y) - // cond: !shiftIsBounded(v) - // result: (SARB x (ORL y (NOTL (SBBLcarrymask (CMPWconst y [8]))))) + // match: (ShiftRightSignExtendedMaskedInt64x2 x y mask) + // result: (VPSRAVQMasked128 x y (VPMOVVec64x2ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SARB) - v.Type = t - v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type) - v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type) - v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type) - v3 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags) - v3.AuxInt = int16ToAuxInt(8) - v3.AddArg(y) - v2.AddArg(v3) - v1.AddArg(v2) - v0.AddArg2(y, v1) - v.AddArg2(x, v0) + mask := v_2 + v.reset(OpAMD64VPSRAVQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (Rsh8x16 x y) - // cond: shiftIsBounded(v) - // result: (SARB x y) +} +func rewriteValueAMD64_OpShiftRightSignExtendedMaskedInt64x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftRightSignExtendedMaskedInt64x4 x y mask) + // result: (VPSRAVQMasked256 x y (VPMOVVec64x4ToM mask)) for { x := v_0 y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SARB) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSRAVQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - return false } -func rewriteValueAMD64_OpRsh8x32(v *Value) bool { +func rewriteValueAMD64_OpShiftRightSignExtendedMaskedInt64x8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Rsh8x32 x y) - // cond: !shiftIsBounded(v) - // result: (SARB x (ORL y (NOTL (SBBLcarrymask (CMPLconst y [8]))))) + // match: (ShiftRightSignExtendedMaskedInt64x8 x y mask) + // result: (VPSRAVQMasked512 x y (VPMOVVec64x8ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SARB) - v.Type = t - v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type) - v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type) - v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type) - v3 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags) - v3.AuxInt = int32ToAuxInt(8) - v3.AddArg(y) - v2.AddArg(v3) - v1.AddArg(v2) - v0.AddArg2(y, v1) - v.AddArg2(x, v0) + mask := v_2 + v.reset(OpAMD64VPSRAVQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (Rsh8x32 x y) - // cond: shiftIsBounded(v) - // result: (SARB x y) +} +func rewriteValueAMD64_OpShiftRightSignExtendedMaskedUint16x16(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftRightSignExtendedMaskedUint16x16 x y mask) + // result: (VPSRAVWMasked256 x y (VPMOVVec16x16ToM mask)) for { x := v_0 y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SARB) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSRAVWMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - return false } -func rewriteValueAMD64_OpRsh8x64(v *Value) bool { +func rewriteValueAMD64_OpShiftRightSignExtendedMaskedUint16x32(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Rsh8x64 x y) - // cond: !shiftIsBounded(v) - // result: (SARB x (ORQ y (NOTQ (SBBQcarrymask (CMPQconst y [8]))))) + // match: (ShiftRightSignExtendedMaskedUint16x32 x y mask) + // result: (VPSRAVWMasked512 x y (VPMOVVec16x32ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SARB) - v.Type = t - v0 := b.NewValue0(v.Pos, OpAMD64ORQ, y.Type) - v1 := b.NewValue0(v.Pos, OpAMD64NOTQ, y.Type) - v2 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, y.Type) - v3 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags) - v3.AuxInt = int32ToAuxInt(8) - v3.AddArg(y) - v2.AddArg(v3) - v1.AddArg(v2) - v0.AddArg2(y, v1) - v.AddArg2(x, v0) + mask := v_2 + v.reset(OpAMD64VPSRAVWMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (Rsh8x64 x y) - // cond: shiftIsBounded(v) - // result: (SARB x y) +} +func rewriteValueAMD64_OpShiftRightSignExtendedMaskedUint16x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftRightSignExtendedMaskedUint16x8 x y mask) + // result: (VPSRAVWMasked128 x y (VPMOVVec16x8ToM mask)) for { x := v_0 y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SARB) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSRAVWMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - return false } -func rewriteValueAMD64_OpRsh8x8(v *Value) bool { +func rewriteValueAMD64_OpShiftRightSignExtendedMaskedUint32x16(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Rsh8x8 x y) - // cond: !shiftIsBounded(v) - // result: (SARB x (ORL y (NOTL (SBBLcarrymask (CMPBconst y [8]))))) + // match: (ShiftRightSignExtendedMaskedUint32x16 x y mask) + // result: (VPSRAVDMasked512 x y (VPMOVVec32x16ToM mask)) for { - t := v.Type x := v_0 y := v_1 - if !(!shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SARB) - v.Type = t - v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type) - v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type) - v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type) - v3 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags) - v3.AuxInt = int8ToAuxInt(8) - v3.AddArg(y) - v2.AddArg(v3) - v1.AddArg(v2) - v0.AddArg2(y, v1) - v.AddArg2(x, v0) + mask := v_2 + v.reset(OpAMD64VPSRAVDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (Rsh8x8 x y) - // cond: shiftIsBounded(v) - // result: (SARB x y) +} +func rewriteValueAMD64_OpShiftRightSignExtendedMaskedUint32x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftRightSignExtendedMaskedUint32x4 x y mask) + // result: (VPSRAVDMasked128 x y (VPMOVVec32x4ToM mask)) for { x := v_0 y := v_1 - if !(shiftIsBounded(v)) { - break - } - v.reset(OpAMD64SARB) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSRAVDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - return false } -func rewriteValueAMD64_OpSelect0(v *Value) bool { +func rewriteValueAMD64_OpShiftRightSignExtendedMaskedUint32x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (Select0 (Mul64uover x y)) - // result: (Select0 (MULQU x y)) + // match: (ShiftRightSignExtendedMaskedUint32x8 x y mask) + // result: (VPSRAVDMasked256 x y (VPMOVVec32x8ToM mask)) for { - if v_0.Op != OpMul64uover { - break - } - y := v_0.Args[1] - x := v_0.Args[0] - v.reset(OpSelect0) - v.Type = typ.UInt64 - v0 := b.NewValue0(v.Pos, OpAMD64MULQU, types.NewTuple(typ.UInt64, types.TypeFlags)) - v0.AddArg2(x, y) - v.AddArg(v0) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRAVDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (Select0 (Mul32uover x y)) - // result: (Select0 (MULLU x y)) +} +func rewriteValueAMD64_OpShiftRightSignExtendedMaskedUint64x2(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftRightSignExtendedMaskedUint64x2 x y mask) + // result: (VPSRAVQMasked128 x y (VPMOVVec64x2ToM mask)) for { - if v_0.Op != OpMul32uover { - break - } - y := v_0.Args[1] - x := v_0.Args[0] - v.reset(OpSelect0) - v.Type = typ.UInt32 - v0 := b.NewValue0(v.Pos, OpAMD64MULLU, types.NewTuple(typ.UInt32, types.TypeFlags)) - v0.AddArg2(x, y) - v.AddArg(v0) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRAVQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (Select0 (Add64carry x y c)) - // result: (Select0 (ADCQ x y (Select1 (NEGLflags c)))) +} +func rewriteValueAMD64_OpShiftRightSignExtendedMaskedUint64x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftRightSignExtendedMaskedUint64x4 x y mask) + // result: (VPSRAVQMasked256 x y (VPMOVVec64x4ToM mask)) for { - if v_0.Op != OpAdd64carry { - break - } - c := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - v.reset(OpSelect0) - v.Type = typ.UInt64 - v0 := b.NewValue0(v.Pos, OpAMD64ADCQ, types.NewTuple(typ.UInt64, types.TypeFlags)) - v1 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) - v2 := b.NewValue0(v.Pos, OpAMD64NEGLflags, types.NewTuple(typ.UInt32, types.TypeFlags)) - v2.AddArg(c) - v1.AddArg(v2) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRAVQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (Select0 (Sub64borrow x y c)) - // result: (Select0 (SBBQ x y (Select1 (NEGLflags c)))) +} +func rewriteValueAMD64_OpShiftRightSignExtendedMaskedUint64x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftRightSignExtendedMaskedUint64x8 x y mask) + // result: (VPSRAVQMasked512 x y (VPMOVVec64x8ToM mask)) for { - if v_0.Op != OpSub64borrow { - break - } - c := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - v.reset(OpSelect0) - v.Type = typ.UInt64 - v0 := b.NewValue0(v.Pos, OpAMD64SBBQ, types.NewTuple(typ.UInt64, types.TypeFlags)) - v1 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) - v2 := b.NewValue0(v.Pos, OpAMD64NEGLflags, types.NewTuple(typ.UInt32, types.TypeFlags)) - v2.AddArg(c) - v1.AddArg(v2) - v0.AddArg3(x, y, v1) - v.AddArg(v0) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRAVQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (Select0 (AddTupleFirst32 val tuple)) - // result: (ADDL val (Select0 tuple)) +} +func rewriteValueAMD64_OpSlicemask(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + // match: (Slicemask x) + // result: (SARQconst (NEGQ x) [63]) for { t := v.Type - if v_0.Op != OpAMD64AddTupleFirst32 { - break - } - tuple := v_0.Args[1] - val := v_0.Args[0] - v.reset(OpAMD64ADDL) - v0 := b.NewValue0(v.Pos, OpSelect0, t) - v0.AddArg(tuple) - v.AddArg2(val, v0) + x := v_0 + v.reset(OpAMD64SARQconst) + v.AuxInt = int8ToAuxInt(63) + v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t) + v0.AddArg(x) + v.AddArg(v0) return true } - // match: (Select0 (AddTupleFirst64 val tuple)) - // result: (ADDQ val (Select0 tuple)) +} +func rewriteValueAMD64_OpSpectreIndex(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (SpectreIndex x y) + // result: (CMOVQCC x (MOVQconst [0]) (CMPQ x y)) for { - t := v.Type - if v_0.Op != OpAMD64AddTupleFirst64 { - break - } - tuple := v_0.Args[1] - val := v_0.Args[0] - v.reset(OpAMD64ADDQ) - v0 := b.NewValue0(v.Pos, OpSelect0, t) - v0.AddArg(tuple) - v.AddArg2(val, v0) + x := v_0 + y := v_1 + v.reset(OpAMD64CMOVQCC) + v0 := b.NewValue0(v.Pos, OpAMD64MOVQconst, typ.UInt64) + v0.AuxInt = int64ToAuxInt(0) + v1 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags) + v1.AddArg2(x, y) + v.AddArg3(x, v0, v1) return true } - // match: (Select0 a:(ADDQconstflags [c] x)) - // cond: a.Uses == 1 - // result: (ADDQconst [c] x) +} +func rewriteValueAMD64_OpSpectreSliceIndex(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (SpectreSliceIndex x y) + // result: (CMOVQHI x (MOVQconst [0]) (CMPQ x y)) for { - a := v_0 - if a.Op != OpAMD64ADDQconstflags { - break - } - c := auxIntToInt32(a.AuxInt) - x := a.Args[0] - if !(a.Uses == 1) { - break - } - v.reset(OpAMD64ADDQconst) - v.AuxInt = int32ToAuxInt(c) - v.AddArg(x) + x := v_0 + y := v_1 + v.reset(OpAMD64CMOVQHI) + v0 := b.NewValue0(v.Pos, OpAMD64MOVQconst, typ.UInt64) + v0.AuxInt = int64ToAuxInt(0) + v1 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags) + v1.AddArg2(x, y) + v.AddArg3(x, v0, v1) return true } - // match: (Select0 a:(ADDLconstflags [c] x)) - // cond: a.Uses == 1 - // result: (ADDLconst [c] x) +} +func rewriteValueAMD64_OpSqrtMaskedFloat32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (SqrtMaskedFloat32x16 x mask) + // result: (VSQRTPSMasked512 x (VPMOVVec32x16ToM mask)) for { - a := v_0 - if a.Op != OpAMD64ADDLconstflags { - break - } - c := auxIntToInt32(a.AuxInt) - x := a.Args[0] - if !(a.Uses == 1) { - break - } - v.reset(OpAMD64ADDLconst) - v.AuxInt = int32ToAuxInt(c) - v.AddArg(x) + x := v_0 + mask := v_1 + v.reset(OpAMD64VSQRTPSMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } - return false } -func rewriteValueAMD64_OpSelect1(v *Value) bool { +func rewriteValueAMD64_OpSqrtMaskedFloat32x4(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (Select1 (Mul64uover x y)) - // result: (SETO (Select1 (MULQU x y))) - for { - if v_0.Op != OpMul64uover { - break - } - y := v_0.Args[1] - x := v_0.Args[0] - v.reset(OpAMD64SETO) - v0 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) - v1 := b.NewValue0(v.Pos, OpAMD64MULQU, types.NewTuple(typ.UInt64, types.TypeFlags)) - v1.AddArg2(x, y) - v0.AddArg(v1) - v.AddArg(v0) + // match: (SqrtMaskedFloat32x4 x mask) + // result: (VSQRTPSMasked128 x (VPMOVVec32x4ToM mask)) + for { + x := v_0 + mask := v_1 + v.reset(OpAMD64VSQRTPSMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } - // match: (Select1 (Mul32uover x y)) - // result: (SETO (Select1 (MULLU x y))) +} +func rewriteValueAMD64_OpSqrtMaskedFloat32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (SqrtMaskedFloat32x8 x mask) + // result: (VSQRTPSMasked256 x (VPMOVVec32x8ToM mask)) for { - if v_0.Op != OpMul32uover { - break - } - y := v_0.Args[1] - x := v_0.Args[0] - v.reset(OpAMD64SETO) - v0 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) - v1 := b.NewValue0(v.Pos, OpAMD64MULLU, types.NewTuple(typ.UInt32, types.TypeFlags)) - v1.AddArg2(x, y) - v0.AddArg(v1) - v.AddArg(v0) + x := v_0 + mask := v_1 + v.reset(OpAMD64VSQRTPSMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } - // match: (Select1 (Add64carry x y c)) - // result: (NEGQ (SBBQcarrymask (Select1 (ADCQ x y (Select1 (NEGLflags c)))))) +} +func rewriteValueAMD64_OpSqrtMaskedFloat64x2(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (SqrtMaskedFloat64x2 x mask) + // result: (VSQRTPDMasked128 x (VPMOVVec64x2ToM mask)) for { - if v_0.Op != OpAdd64carry { - break - } - c := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - v.reset(OpAMD64NEGQ) - v.Type = typ.UInt64 - v0 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, typ.UInt64) - v1 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) - v2 := b.NewValue0(v.Pos, OpAMD64ADCQ, types.NewTuple(typ.UInt64, types.TypeFlags)) - v3 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) - v4 := b.NewValue0(v.Pos, OpAMD64NEGLflags, types.NewTuple(typ.UInt32, types.TypeFlags)) - v4.AddArg(c) - v3.AddArg(v4) - v2.AddArg3(x, y, v3) - v1.AddArg(v2) - v0.AddArg(v1) - v.AddArg(v0) + x := v_0 + mask := v_1 + v.reset(OpAMD64VSQRTPDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } - // match: (Select1 (Sub64borrow x y c)) - // result: (NEGQ (SBBQcarrymask (Select1 (SBBQ x y (Select1 (NEGLflags c)))))) +} +func rewriteValueAMD64_OpSqrtMaskedFloat64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (SqrtMaskedFloat64x4 x mask) + // result: (VSQRTPDMasked256 x (VPMOVVec64x4ToM mask)) for { - if v_0.Op != OpSub64borrow { - break - } - c := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - v.reset(OpAMD64NEGQ) - v.Type = typ.UInt64 - v0 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, typ.UInt64) - v1 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) - v2 := b.NewValue0(v.Pos, OpAMD64SBBQ, types.NewTuple(typ.UInt64, types.TypeFlags)) - v3 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) - v4 := b.NewValue0(v.Pos, OpAMD64NEGLflags, types.NewTuple(typ.UInt32, types.TypeFlags)) - v4.AddArg(c) - v3.AddArg(v4) - v2.AddArg3(x, y, v3) - v1.AddArg(v2) - v0.AddArg(v1) - v.AddArg(v0) + x := v_0 + mask := v_1 + v.reset(OpAMD64VSQRTPDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } - // match: (Select1 (NEGLflags (MOVQconst [0]))) - // result: (FlagEQ) +} +func rewriteValueAMD64_OpSqrtMaskedFloat64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (SqrtMaskedFloat64x8 x mask) + // result: (VSQRTPDMasked512 x (VPMOVVec64x8ToM mask)) for { - if v_0.Op != OpAMD64NEGLflags { - break - } - v_0_0 := v_0.Args[0] - if v_0_0.Op != OpAMD64MOVQconst || auxIntToInt64(v_0_0.AuxInt) != 0 { - break - } - v.reset(OpAMD64FlagEQ) + x := v_0 + mask := v_1 + v.reset(OpAMD64VSQRTPDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } - // match: (Select1 (NEGLflags (NEGQ (SBBQcarrymask x)))) - // result: x +} +func rewriteValueAMD64_OpStore(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (Store {t} ptr val mem) + // cond: t.Size() == 8 && t.IsFloat() + // result: (MOVSDstore ptr val mem) for { - if v_0.Op != OpAMD64NEGLflags { - break - } - v_0_0 := v_0.Args[0] - if v_0_0.Op != OpAMD64NEGQ { - break - } - v_0_0_0 := v_0_0.Args[0] - if v_0_0_0.Op != OpAMD64SBBQcarrymask { + t := auxToType(v.Aux) + ptr := v_0 + val := v_1 + mem := v_2 + if !(t.Size() == 8 && t.IsFloat()) { break } - x := v_0_0_0.Args[0] - v.copyOf(x) + v.reset(OpAMD64MOVSDstore) + v.AddArg3(ptr, val, mem) return true } - // match: (Select1 (AddTupleFirst32 _ tuple)) - // result: (Select1 tuple) + // match: (Store {t} ptr val mem) + // cond: t.Size() == 4 && t.IsFloat() + // result: (MOVSSstore ptr val mem) for { - if v_0.Op != OpAMD64AddTupleFirst32 { + t := auxToType(v.Aux) + ptr := v_0 + val := v_1 + mem := v_2 + if !(t.Size() == 4 && t.IsFloat()) { break } - tuple := v_0.Args[1] - v.reset(OpSelect1) - v.AddArg(tuple) + v.reset(OpAMD64MOVSSstore) + v.AddArg3(ptr, val, mem) return true } - // match: (Select1 (AddTupleFirst64 _ tuple)) - // result: (Select1 tuple) + // match: (Store {t} ptr val mem) + // cond: t.Size() == 8 && !t.IsFloat() + // result: (MOVQstore ptr val mem) for { - if v_0.Op != OpAMD64AddTupleFirst64 { + t := auxToType(v.Aux) + ptr := v_0 + val := v_1 + mem := v_2 + if !(t.Size() == 8 && !t.IsFloat()) { break } - tuple := v_0.Args[1] - v.reset(OpSelect1) - v.AddArg(tuple) + v.reset(OpAMD64MOVQstore) + v.AddArg3(ptr, val, mem) return true } - // match: (Select1 a:(LoweredAtomicAnd64 ptr val mem)) - // cond: a.Uses == 1 && clobber(a) - // result: (ANDQlock ptr val mem) + // match: (Store {t} ptr val mem) + // cond: t.Size() == 4 && !t.IsFloat() + // result: (MOVLstore ptr val mem) for { - a := v_0 - if a.Op != OpAMD64LoweredAtomicAnd64 { - break - } - mem := a.Args[2] - ptr := a.Args[0] - val := a.Args[1] - if !(a.Uses == 1 && clobber(a)) { + t := auxToType(v.Aux) + ptr := v_0 + val := v_1 + mem := v_2 + if !(t.Size() == 4 && !t.IsFloat()) { break } - v.reset(OpAMD64ANDQlock) + v.reset(OpAMD64MOVLstore) v.AddArg3(ptr, val, mem) return true } - // match: (Select1 a:(LoweredAtomicAnd32 ptr val mem)) - // cond: a.Uses == 1 && clobber(a) - // result: (ANDLlock ptr val mem) + // match: (Store {t} ptr val mem) + // cond: t.Size() == 2 + // result: (MOVWstore ptr val mem) for { - a := v_0 - if a.Op != OpAMD64LoweredAtomicAnd32 { - break - } - mem := a.Args[2] - ptr := a.Args[0] - val := a.Args[1] - if !(a.Uses == 1 && clobber(a)) { + t := auxToType(v.Aux) + ptr := v_0 + val := v_1 + mem := v_2 + if !(t.Size() == 2) { break } - v.reset(OpAMD64ANDLlock) + v.reset(OpAMD64MOVWstore) v.AddArg3(ptr, val, mem) return true } - // match: (Select1 a:(LoweredAtomicOr64 ptr val mem)) - // cond: a.Uses == 1 && clobber(a) - // result: (ORQlock ptr val mem) + // match: (Store {t} ptr val mem) + // cond: t.Size() == 1 + // result: (MOVBstore ptr val mem) for { - a := v_0 - if a.Op != OpAMD64LoweredAtomicOr64 { - break - } - mem := a.Args[2] - ptr := a.Args[0] - val := a.Args[1] - if !(a.Uses == 1 && clobber(a)) { + t := auxToType(v.Aux) + ptr := v_0 + val := v_1 + mem := v_2 + if !(t.Size() == 1) { break } - v.reset(OpAMD64ORQlock) + v.reset(OpAMD64MOVBstore) v.AddArg3(ptr, val, mem) return true } - // match: (Select1 a:(LoweredAtomicOr32 ptr val mem)) - // cond: a.Uses == 1 && clobber(a) - // result: (ORLlock ptr val mem) + // match: (Store {t} ptr val mem) + // cond: t.Size() == 16 + // result: (VMOVDQUstore128 ptr val mem) for { - a := v_0 - if a.Op != OpAMD64LoweredAtomicOr32 { - break - } - mem := a.Args[2] - ptr := a.Args[0] - val := a.Args[1] - if !(a.Uses == 1 && clobber(a)) { + t := auxToType(v.Aux) + ptr := v_0 + val := v_1 + mem := v_2 + if !(t.Size() == 16) { break } - v.reset(OpAMD64ORLlock) + v.reset(OpAMD64VMOVDQUstore128) v.AddArg3(ptr, val, mem) return true } - return false -} -func rewriteValueAMD64_OpSelectN(v *Value) bool { - v_0 := v.Args[0] - b := v.Block - config := b.Func.Config - // match: (SelectN [0] call:(CALLstatic {sym} s1:(MOVQstoreconst _ [sc] s2:(MOVQstore _ src s3:(MOVQstore _ dst mem))))) - // cond: sc.Val64() >= 0 && isSameCall(sym, "runtime.memmove") && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && isInlinableMemmove(dst, src, sc.Val64(), config) && clobber(s1, s2, s3, call) - // result: (Move [sc.Val64()] dst src mem) + // match: (Store {t} ptr val mem) + // cond: t.Size() == 32 + // result: (VMOVDQUstore256 ptr val mem) for { - if auxIntToInt64(v.AuxInt) != 0 { - break - } - call := v_0 - if call.Op != OpAMD64CALLstatic || len(call.Args) != 1 { - break - } - sym := auxToCall(call.Aux) - s1 := call.Args[0] - if s1.Op != OpAMD64MOVQstoreconst { - break - } - sc := auxIntToValAndOff(s1.AuxInt) - _ = s1.Args[1] - s2 := s1.Args[1] - if s2.Op != OpAMD64MOVQstore { - break - } - _ = s2.Args[2] - src := s2.Args[1] - s3 := s2.Args[2] - if s3.Op != OpAMD64MOVQstore { - break - } - mem := s3.Args[2] - dst := s3.Args[1] - if !(sc.Val64() >= 0 && isSameCall(sym, "runtime.memmove") && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && isInlinableMemmove(dst, src, sc.Val64(), config) && clobber(s1, s2, s3, call)) { + t := auxToType(v.Aux) + ptr := v_0 + val := v_1 + mem := v_2 + if !(t.Size() == 32) { break } - v.reset(OpMove) - v.AuxInt = int64ToAuxInt(sc.Val64()) - v.AddArg3(dst, src, mem) + v.reset(OpAMD64VMOVDQUstore256) + v.AddArg3(ptr, val, mem) return true } - // match: (SelectN [0] call:(CALLstatic {sym} dst src (MOVQconst [sz]) mem)) - // cond: sz >= 0 && isSameCall(sym, "runtime.memmove") && call.Uses == 1 && isInlinableMemmove(dst, src, sz, config) && clobber(call) - // result: (Move [sz] dst src mem) + // match: (Store {t} ptr val mem) + // cond: t.Size() == 64 + // result: (VMOVDQUstore512 ptr val mem) for { - if auxIntToInt64(v.AuxInt) != 0 { - break - } - call := v_0 - if call.Op != OpAMD64CALLstatic || len(call.Args) != 4 { - break - } - sym := auxToCall(call.Aux) - mem := call.Args[3] - dst := call.Args[0] - src := call.Args[1] - call_2 := call.Args[2] - if call_2.Op != OpAMD64MOVQconst { - break - } - sz := auxIntToInt64(call_2.AuxInt) - if !(sz >= 0 && isSameCall(sym, "runtime.memmove") && call.Uses == 1 && isInlinableMemmove(dst, src, sz, config) && clobber(call)) { + t := auxToType(v.Aux) + ptr := v_0 + val := v_1 + mem := v_2 + if !(t.Size() == 64) { break } - v.reset(OpMove) - v.AuxInt = int64ToAuxInt(sz) - v.AddArg3(dst, src, mem) + v.reset(OpAMD64VMOVDQUstore512) + v.AddArg3(ptr, val, mem) return true } return false } -func rewriteValueAMD64_OpSet128Float32x8(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (Set128Float32x8 [a] x y) - // result: (VINSERTF128256 [a] x y) - for { - a := auxIntToInt8(v.AuxInt) - x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTF128256) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) - return true - } -} -func rewriteValueAMD64_OpSet128Float64x4(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (Set128Float64x4 [a] x y) - // result: (VINSERTF128256 [a] x y) - for { - a := auxIntToInt8(v.AuxInt) - x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTF128256) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) - return true - } -} -func rewriteValueAMD64_OpSet128Int16x16(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (Set128Int16x16 [a] x y) - // result: (VINSERTI128256 [a] x y) - for { - a := auxIntToInt8(v.AuxInt) - x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTI128256) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) - return true - } -} -func rewriteValueAMD64_OpSet128Int32x8(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (Set128Int32x8 [a] x y) - // result: (VINSERTI128256 [a] x y) - for { - a := auxIntToInt8(v.AuxInt) - x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTI128256) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) - return true - } -} -func rewriteValueAMD64_OpSet128Int64x4(v *Value) bool { +func rewriteValueAMD64_OpSubMaskedFloat32x16(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (Set128Int64x4 [a] x y) - // result: (VINSERTI128256 [a] x y) + b := v.Block + // match: (SubMaskedFloat32x16 x y mask) + // result: (VSUBPSMasked512 x y (VPMOVVec32x16ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VINSERTI128256) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VSUBPSMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpSet128Int8x32(v *Value) bool { +func rewriteValueAMD64_OpSubMaskedFloat32x4(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (Set128Int8x32 [a] x y) - // result: (VINSERTI128256 [a] x y) + b := v.Block + // match: (SubMaskedFloat32x4 x y mask) + // result: (VSUBPSMasked128 x y (VPMOVVec32x4ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VINSERTI128256) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VSUBPSMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpSet128Uint16x16(v *Value) bool { +func rewriteValueAMD64_OpSubMaskedFloat32x8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (Set128Uint16x16 [a] x y) - // result: (VINSERTI128256 [a] x y) + b := v.Block + // match: (SubMaskedFloat32x8 x y mask) + // result: (VSUBPSMasked256 x y (VPMOVVec32x8ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VINSERTI128256) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VSUBPSMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpSet128Uint32x8(v *Value) bool { +func rewriteValueAMD64_OpSubMaskedFloat64x2(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (Set128Uint32x8 [a] x y) - // result: (VINSERTI128256 [a] x y) + b := v.Block + // match: (SubMaskedFloat64x2 x y mask) + // result: (VSUBPDMasked128 x y (VPMOVVec64x2ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VINSERTI128256) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VSUBPDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpSet128Uint64x4(v *Value) bool { +func rewriteValueAMD64_OpSubMaskedFloat64x4(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (Set128Uint64x4 [a] x y) - // result: (VINSERTI128256 [a] x y) + b := v.Block + // match: (SubMaskedFloat64x4 x y mask) + // result: (VSUBPDMasked256 x y (VPMOVVec64x4ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VINSERTI128256) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VSUBPDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpSet128Uint8x32(v *Value) bool { +func rewriteValueAMD64_OpSubMaskedFloat64x8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (Set128Uint8x32 [a] x y) - // result: (VINSERTI128256 [a] x y) + b := v.Block + // match: (SubMaskedFloat64x8 x y mask) + // result: (VSUBPDMasked512 x y (VPMOVVec64x8ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VINSERTI128256) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VSUBPDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpSetElemInt16x8(v *Value) bool { +func rewriteValueAMD64_OpSubMaskedInt16x16(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (SetElemInt16x8 [a] x y) - // result: (VPINSRW128 [a] x y) + b := v.Block + // match: (SubMaskedInt16x16 x y mask) + // result: (VPSUBWMasked256 x y (VPMOVVec16x16ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPINSRW128) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSUBWMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpSetElemInt32x4(v *Value) bool { +func rewriteValueAMD64_OpSubMaskedInt16x32(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (SetElemInt32x4 [a] x y) - // result: (VPINSRD128 [a] x y) + b := v.Block + // match: (SubMaskedInt16x32 x y mask) + // result: (VPSUBWMasked512 x y (VPMOVVec16x32ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPINSRD128) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSUBWMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpSetElemInt64x2(v *Value) bool { +func rewriteValueAMD64_OpSubMaskedInt16x8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (SetElemInt64x2 [a] x y) - // result: (VPINSRQ128 [a] x y) + b := v.Block + // match: (SubMaskedInt16x8 x y mask) + // result: (VPSUBWMasked128 x y (VPMOVVec16x8ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPINSRQ128) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSUBWMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpSetElemInt8x16(v *Value) bool { +func rewriteValueAMD64_OpSubMaskedInt32x16(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (SetElemInt8x16 [a] x y) - // result: (VPINSRB128 [a] x y) + b := v.Block + // match: (SubMaskedInt32x16 x y mask) + // result: (VPSUBDMasked512 x y (VPMOVVec32x16ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPINSRB128) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSUBDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpSetElemUint16x8(v *Value) bool { +func rewriteValueAMD64_OpSubMaskedInt32x4(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (SetElemUint16x8 [a] x y) - // result: (VPINSRW128 [a] x y) + b := v.Block + // match: (SubMaskedInt32x4 x y mask) + // result: (VPSUBDMasked128 x y (VPMOVVec32x4ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPINSRW128) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSUBDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpSetElemUint32x4(v *Value) bool { +func rewriteValueAMD64_OpSubMaskedInt32x8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (SetElemUint32x4 [a] x y) - // result: (VPINSRD128 [a] x y) + b := v.Block + // match: (SubMaskedInt32x8 x y mask) + // result: (VPSUBDMasked256 x y (VPMOVVec32x8ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPINSRD128) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSUBDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpSetElemUint64x2(v *Value) bool { +func rewriteValueAMD64_OpSubMaskedInt64x2(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (SetElemUint64x2 [a] x y) - // result: (VPINSRQ128 [a] x y) + b := v.Block + // match: (SubMaskedInt64x2 x y mask) + // result: (VPSUBQMasked128 x y (VPMOVVec64x2ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPINSRQ128) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSUBQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpSetElemUint8x16(v *Value) bool { +func rewriteValueAMD64_OpSubMaskedInt64x4(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (SetElemUint8x16 [a] x y) - // result: (VPINSRB128 [a] x y) + b := v.Block + // match: (SubMaskedInt64x4 x y mask) + // result: (VPSUBQMasked256 x y (VPMOVVec64x4ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPINSRB128) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSUBQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromInt16x16(v *Value) bool { +func rewriteValueAMD64_OpSubMaskedInt64x8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (ShiftAllLeftAndFillUpperFromInt16x16 [a] x y) - // result: (VPSHLDW256 [a] x y) + b := v.Block + // match: (SubMaskedInt64x8 x y mask) + // result: (VPSUBQMasked512 x y (VPMOVVec64x8ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPSHLDW256) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSUBQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromInt16x32(v *Value) bool { +func rewriteValueAMD64_OpSubMaskedInt8x16(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (ShiftAllLeftAndFillUpperFromInt16x32 [a] x y) - // result: (VPSHLDW512 [a] x y) + b := v.Block + // match: (SubMaskedInt8x16 x y mask) + // result: (VPSUBBMasked128 x y (VPMOVVec8x16ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPSHLDW512) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSUBBMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromInt16x8(v *Value) bool { +func rewriteValueAMD64_OpSubMaskedInt8x32(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (ShiftAllLeftAndFillUpperFromInt16x8 [a] x y) - // result: (VPSHLDW128 [a] x y) + b := v.Block + // match: (SubMaskedInt8x32 x y mask) + // result: (VPSUBBMasked256 x y (VPMOVVec8x32ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPSHLDW128) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSUBBMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromInt32x16(v *Value) bool { +func rewriteValueAMD64_OpSubMaskedInt8x64(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (ShiftAllLeftAndFillUpperFromInt32x16 [a] x y) - // result: (VPSHLDD512 [a] x y) + b := v.Block + // match: (SubMaskedInt8x64 x y mask) + // result: (VPSUBBMasked512 x y (VPMOVVec8x64ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPSHLDD512) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSUBBMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromInt32x4(v *Value) bool { +func rewriteValueAMD64_OpSubMaskedUint16x16(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (ShiftAllLeftAndFillUpperFromInt32x4 [a] x y) - // result: (VPSHLDD128 [a] x y) + b := v.Block + // match: (SubMaskedUint16x16 x y mask) + // result: (VPSUBWMasked256 x y (VPMOVVec16x16ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPSHLDD128) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSUBWMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromInt32x8(v *Value) bool { +func rewriteValueAMD64_OpSubMaskedUint16x32(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (ShiftAllLeftAndFillUpperFromInt32x8 [a] x y) - // result: (VPSHLDD256 [a] x y) + b := v.Block + // match: (SubMaskedUint16x32 x y mask) + // result: (VPSUBWMasked512 x y (VPMOVVec16x32ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPSHLDD256) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSUBWMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromInt64x2(v *Value) bool { +func rewriteValueAMD64_OpSubMaskedUint16x8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (ShiftAllLeftAndFillUpperFromInt64x2 [a] x y) - // result: (VPSHLDQ128 [a] x y) + b := v.Block + // match: (SubMaskedUint16x8 x y mask) + // result: (VPSUBWMasked128 x y (VPMOVVec16x8ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPSHLDQ128) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSUBWMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromInt64x4(v *Value) bool { +func rewriteValueAMD64_OpSubMaskedUint32x16(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (ShiftAllLeftAndFillUpperFromInt64x4 [a] x y) - // result: (VPSHLDQ256 [a] x y) + b := v.Block + // match: (SubMaskedUint32x16 x y mask) + // result: (VPSUBDMasked512 x y (VPMOVVec32x16ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPSHLDQ256) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSUBDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromInt64x8(v *Value) bool { +func rewriteValueAMD64_OpSubMaskedUint32x4(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (ShiftAllLeftAndFillUpperFromInt64x8 [a] x y) - // result: (VPSHLDQ512 [a] x y) + b := v.Block + // match: (SubMaskedUint32x4 x y mask) + // result: (VPSUBDMasked128 x y (VPMOVVec32x4ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPSHLDQ512) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSUBDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromUint16x16(v *Value) bool { +func rewriteValueAMD64_OpSubMaskedUint32x8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (ShiftAllLeftAndFillUpperFromUint16x16 [a] x y) - // result: (VPSHLDW256 [a] x y) + b := v.Block + // match: (SubMaskedUint32x8 x y mask) + // result: (VPSUBDMasked256 x y (VPMOVVec32x8ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPSHLDW256) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSUBDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromUint16x32(v *Value) bool { +func rewriteValueAMD64_OpSubMaskedUint64x2(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (ShiftAllLeftAndFillUpperFromUint16x32 [a] x y) - // result: (VPSHLDW512 [a] x y) + b := v.Block + // match: (SubMaskedUint64x2 x y mask) + // result: (VPSUBQMasked128 x y (VPMOVVec64x2ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPSHLDW512) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSUBQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromUint16x8(v *Value) bool { +func rewriteValueAMD64_OpSubMaskedUint64x4(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (ShiftAllLeftAndFillUpperFromUint16x8 [a] x y) - // result: (VPSHLDW128 [a] x y) + b := v.Block + // match: (SubMaskedUint64x4 x y mask) + // result: (VPSUBQMasked256 x y (VPMOVVec64x4ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPSHLDW128) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSUBQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromUint32x16(v *Value) bool { +func rewriteValueAMD64_OpSubMaskedUint64x8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (ShiftAllLeftAndFillUpperFromUint32x16 [a] x y) - // result: (VPSHLDD512 [a] x y) + b := v.Block + // match: (SubMaskedUint64x8 x y mask) + // result: (VPSUBQMasked512 x y (VPMOVVec64x8ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPSHLDD512) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSUBQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromUint32x4(v *Value) bool { +func rewriteValueAMD64_OpSubMaskedUint8x16(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (ShiftAllLeftAndFillUpperFromUint32x4 [a] x y) - // result: (VPSHLDD128 [a] x y) + b := v.Block + // match: (SubMaskedUint8x16 x y mask) + // result: (VPSUBBMasked128 x y (VPMOVVec8x16ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPSHLDD128) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSUBBMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromUint32x8(v *Value) bool { +func rewriteValueAMD64_OpSubMaskedUint8x32(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (ShiftAllLeftAndFillUpperFromUint32x8 [a] x y) - // result: (VPSHLDD256 [a] x y) + b := v.Block + // match: (SubMaskedUint8x32 x y mask) + // result: (VPSUBBMasked256 x y (VPMOVVec8x32ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPSHLDD256) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSUBBMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromUint64x2(v *Value) bool { +func rewriteValueAMD64_OpSubMaskedUint8x64(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (ShiftAllLeftAndFillUpperFromUint64x2 [a] x y) - // result: (VPSHLDQ128 [a] x y) + b := v.Block + // match: (SubMaskedUint8x64 x y mask) + // result: (VPSUBBMasked512 x y (VPMOVVec8x64ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPSHLDQ128) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSUBBMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromUint64x4(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpTrunc(v *Value) bool { v_0 := v.Args[0] - // match: (ShiftAllLeftAndFillUpperFromUint64x4 [a] x y) - // result: (VPSHLDQ256 [a] x y) + // match: (Trunc x) + // result: (ROUNDSD [3] x) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VPSHLDQ256) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) + v.reset(OpAMD64ROUNDSD) + v.AuxInt = int8ToAuxInt(3) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromUint64x8(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpTruncFloat32x4(v *Value) bool { v_0 := v.Args[0] - // match: (ShiftAllLeftAndFillUpperFromUint64x8 [a] x y) - // result: (VPSHLDQ512 [a] x y) + // match: (TruncFloat32x4 x) + // result: (VROUNDPS128 [3] x) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VPSHLDQ512) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) + v.reset(OpAMD64VROUNDPS128) + v.AuxInt = int8ToAuxInt(3) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromInt16x16(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpTruncFloat32x8(v *Value) bool { v_0 := v.Args[0] - // match: (ShiftAllRightAndFillUpperFromInt16x16 [a] x y) - // result: (VPSHRDW256 [a] x y) + // match: (TruncFloat32x8 x) + // result: (VROUNDPS256 [3] x) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VPSHRDW256) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) + v.reset(OpAMD64VROUNDPS256) + v.AuxInt = int8ToAuxInt(3) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromInt16x32(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpTruncFloat64x2(v *Value) bool { v_0 := v.Args[0] - // match: (ShiftAllRightAndFillUpperFromInt16x32 [a] x y) - // result: (VPSHRDW512 [a] x y) + // match: (TruncFloat64x2 x) + // result: (VROUNDPD128 [3] x) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VPSHRDW512) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) + v.reset(OpAMD64VROUNDPD128) + v.AuxInt = int8ToAuxInt(3) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromInt16x8(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpTruncFloat64x4(v *Value) bool { v_0 := v.Args[0] - // match: (ShiftAllRightAndFillUpperFromInt16x8 [a] x y) - // result: (VPSHRDW128 [a] x y) + // match: (TruncFloat64x4 x) + // result: (VROUNDPD256 [3] x) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VPSHRDW128) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) + v.reset(OpAMD64VROUNDPD256) + v.AuxInt = int8ToAuxInt(3) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromInt32x16(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpTruncWithPrecisionFloat32x16(v *Value) bool { v_0 := v.Args[0] - // match: (ShiftAllRightAndFillUpperFromInt32x16 [a] x y) - // result: (VPSHRDD512 [a] x y) + // match: (TruncWithPrecisionFloat32x16 [a] x) + // result: (VRNDSCALEPS512 [a+3] x) for { a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VPSHRDD512) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) + v.reset(OpAMD64VRNDSCALEPS512) + v.AuxInt = int8ToAuxInt(a + 3) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromInt32x4(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpTruncWithPrecisionFloat32x4(v *Value) bool { v_0 := v.Args[0] - // match: (ShiftAllRightAndFillUpperFromInt32x4 [a] x y) - // result: (VPSHRDD128 [a] x y) + // match: (TruncWithPrecisionFloat32x4 [a] x) + // result: (VRNDSCALEPS128 [a+3] x) for { a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VPSHRDD128) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) + v.reset(OpAMD64VRNDSCALEPS128) + v.AuxInt = int8ToAuxInt(a + 3) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromInt32x8(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpTruncWithPrecisionFloat32x8(v *Value) bool { v_0 := v.Args[0] - // match: (ShiftAllRightAndFillUpperFromInt32x8 [a] x y) - // result: (VPSHRDD256 [a] x y) + // match: (TruncWithPrecisionFloat32x8 [a] x) + // result: (VRNDSCALEPS256 [a+3] x) for { a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VPSHRDD256) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) + v.reset(OpAMD64VRNDSCALEPS256) + v.AuxInt = int8ToAuxInt(a + 3) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromInt64x2(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpTruncWithPrecisionFloat64x2(v *Value) bool { v_0 := v.Args[0] - // match: (ShiftAllRightAndFillUpperFromInt64x2 [a] x y) - // result: (VPSHRDQ128 [a] x y) + // match: (TruncWithPrecisionFloat64x2 [a] x) + // result: (VRNDSCALEPD128 [a+3] x) for { a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VPSHRDQ128) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) + v.reset(OpAMD64VRNDSCALEPD128) + v.AuxInt = int8ToAuxInt(a + 3) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromInt64x4(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpTruncWithPrecisionFloat64x4(v *Value) bool { v_0 := v.Args[0] - // match: (ShiftAllRightAndFillUpperFromInt64x4 [a] x y) - // result: (VPSHRDQ256 [a] x y) + // match: (TruncWithPrecisionFloat64x4 [a] x) + // result: (VRNDSCALEPD256 [a+3] x) for { a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VPSHRDQ256) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) + v.reset(OpAMD64VRNDSCALEPD256) + v.AuxInt = int8ToAuxInt(a + 3) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromInt64x8(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpTruncWithPrecisionFloat64x8(v *Value) bool { v_0 := v.Args[0] - // match: (ShiftAllRightAndFillUpperFromInt64x8 [a] x y) - // result: (VPSHRDQ512 [a] x y) + // match: (TruncWithPrecisionFloat64x8 [a] x) + // result: (VRNDSCALEPD512 [a+3] x) for { a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VPSHRDQ512) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) + v.reset(OpAMD64VRNDSCALEPD512) + v.AuxInt = int8ToAuxInt(a + 3) + v.AddArg(x) return true } } -func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromUint16x16(v *Value) bool { +func rewriteValueAMD64_OpTruncWithPrecisionMaskedFloat32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (ShiftAllRightAndFillUpperFromUint16x16 [a] x y) - // result: (VPSHRDW256 [a] x y) + b := v.Block + // match: (TruncWithPrecisionMaskedFloat32x16 [a] x mask) + // result: (VRNDSCALEPSMasked512 [a+3] x (VPMOVVec32x16ToM mask)) for { a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VPSHRDW256) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) + mask := v_1 + v.reset(OpAMD64VRNDSCALEPSMasked512) + v.AuxInt = int8ToAuxInt(a + 3) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromUint16x32(v *Value) bool { +func rewriteValueAMD64_OpTruncWithPrecisionMaskedFloat32x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (ShiftAllRightAndFillUpperFromUint16x32 [a] x y) - // result: (VPSHRDW512 [a] x y) + b := v.Block + // match: (TruncWithPrecisionMaskedFloat32x4 [a] x mask) + // result: (VRNDSCALEPSMasked128 [a+3] x (VPMOVVec32x4ToM mask)) for { a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VPSHRDW512) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) + mask := v_1 + v.reset(OpAMD64VRNDSCALEPSMasked128) + v.AuxInt = int8ToAuxInt(a + 3) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromUint16x8(v *Value) bool { +func rewriteValueAMD64_OpTruncWithPrecisionMaskedFloat32x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (ShiftAllRightAndFillUpperFromUint16x8 [a] x y) - // result: (VPSHRDW128 [a] x y) + b := v.Block + // match: (TruncWithPrecisionMaskedFloat32x8 [a] x mask) + // result: (VRNDSCALEPSMasked256 [a+3] x (VPMOVVec32x8ToM mask)) for { a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VPSHRDW128) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) + mask := v_1 + v.reset(OpAMD64VRNDSCALEPSMasked256) + v.AuxInt = int8ToAuxInt(a + 3) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromUint32x16(v *Value) bool { +func rewriteValueAMD64_OpTruncWithPrecisionMaskedFloat64x2(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (ShiftAllRightAndFillUpperFromUint32x16 [a] x y) - // result: (VPSHRDD512 [a] x y) + b := v.Block + // match: (TruncWithPrecisionMaskedFloat64x2 [a] x mask) + // result: (VRNDSCALEPDMasked128 [a+3] x (VPMOVVec64x2ToM mask)) for { a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VPSHRDD512) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) + mask := v_1 + v.reset(OpAMD64VRNDSCALEPDMasked128) + v.AuxInt = int8ToAuxInt(a + 3) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromUint32x4(v *Value) bool { +func rewriteValueAMD64_OpTruncWithPrecisionMaskedFloat64x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (ShiftAllRightAndFillUpperFromUint32x4 [a] x y) - // result: (VPSHRDD128 [a] x y) + b := v.Block + // match: (TruncWithPrecisionMaskedFloat64x4 [a] x mask) + // result: (VRNDSCALEPDMasked256 [a+3] x (VPMOVVec64x4ToM mask)) for { a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VPSHRDD128) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) + mask := v_1 + v.reset(OpAMD64VRNDSCALEPDMasked256) + v.AuxInt = int8ToAuxInt(a + 3) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromUint32x8(v *Value) bool { +func rewriteValueAMD64_OpTruncWithPrecisionMaskedFloat64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (ShiftAllRightAndFillUpperFromUint32x8 [a] x y) - // result: (VPSHRDD256 [a] x y) + b := v.Block + // match: (TruncWithPrecisionMaskedFloat64x8 [a] x mask) + // result: (VRNDSCALEPDMasked512 [a+3] x (VPMOVVec64x8ToM mask)) for { a := auxIntToInt8(v.AuxInt) x := v_0 - y := v_1 - v.reset(OpAMD64VPSHRDD256) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) + mask := v_1 + v.reset(OpAMD64VRNDSCALEPDMasked512) + v.AuxInt = int8ToAuxInt(a + 3) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) return true } } -func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromUint64x2(v *Value) bool { +func rewriteValueAMD64_OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x16(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (ShiftAllRightAndFillUpperFromUint64x2 [a] x y) - // result: (VPSHRDQ128 [a] x y) + b := v.Block + // match: (UnsignedSignedQuadDotProdAccumulateMaskedInt32x16 x y z mask) + // result: (VPDPBUSDMasked512 x y z (VPMOVVec32x16ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPSHRDQ128) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) + z := v_2 + mask := v_3 + v.reset(OpAMD64VPDPBUSDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) return true } } -func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromUint64x4(v *Value) bool { +func rewriteValueAMD64_OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x4(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (ShiftAllRightAndFillUpperFromUint64x4 [a] x y) - // result: (VPSHRDQ256 [a] x y) + b := v.Block + // match: (UnsignedSignedQuadDotProdAccumulateMaskedInt32x4 x y z mask) + // result: (VPDPBUSDMasked128 x y z (VPMOVVec32x4ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPSHRDQ256) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) + z := v_2 + mask := v_3 + v.reset(OpAMD64VPDPBUSDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) return true } } -func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromUint64x8(v *Value) bool { +func rewriteValueAMD64_OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x8(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (ShiftAllRightAndFillUpperFromUint64x8 [a] x y) - // result: (VPSHRDQ512 [a] x y) + b := v.Block + // match: (UnsignedSignedQuadDotProdAccumulateMaskedInt32x8 x y z mask) + // result: (VPDPBUSDMasked256 x y z (VPMOVVec32x8ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPSHRDQ512) - v.AuxInt = int8ToAuxInt(a) - v.AddArg2(x, y) + z := v_2 + mask := v_3 + v.reset(OpAMD64VPDPBUSDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) return true } } -func rewriteValueAMD64_OpSlicemask(v *Value) bool { +func rewriteValueAMD64_OpUnsignedSignedQuadDotProdAccumulateMaskedUint32x16(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (Slicemask x) - // result: (SARQconst (NEGQ x) [63]) + // match: (UnsignedSignedQuadDotProdAccumulateMaskedUint32x16 x y z mask) + // result: (VPDPBUSDMasked512 x y z (VPMOVVec32x16ToM mask)) for { - t := v.Type x := v_0 - v.reset(OpAMD64SARQconst) - v.AuxInt = int8ToAuxInt(63) - v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t) - v0.AddArg(x) - v.AddArg(v0) + y := v_1 + z := v_2 + mask := v_3 + v.reset(OpAMD64VPDPBUSDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) return true } } -func rewriteValueAMD64_OpSpectreIndex(v *Value) bool { +func rewriteValueAMD64_OpUnsignedSignedQuadDotProdAccumulateMaskedUint32x4(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (SpectreIndex x y) - // result: (CMOVQCC x (MOVQconst [0]) (CMPQ x y)) + // match: (UnsignedSignedQuadDotProdAccumulateMaskedUint32x4 x y z mask) + // result: (VPDPBUSDMasked128 x y z (VPMOVVec32x4ToM mask)) for { x := v_0 y := v_1 - v.reset(OpAMD64CMOVQCC) - v0 := b.NewValue0(v.Pos, OpAMD64MOVQconst, typ.UInt64) - v0.AuxInt = int64ToAuxInt(0) - v1 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags) - v1.AddArg2(x, y) - v.AddArg3(x, v0, v1) + z := v_2 + mask := v_3 + v.reset(OpAMD64VPDPBUSDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) return true } } -func rewriteValueAMD64_OpSpectreSliceIndex(v *Value) bool { +func rewriteValueAMD64_OpUnsignedSignedQuadDotProdAccumulateMaskedUint32x8(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - typ := &b.Func.Config.Types - // match: (SpectreSliceIndex x y) - // result: (CMOVQHI x (MOVQconst [0]) (CMPQ x y)) + // match: (UnsignedSignedQuadDotProdAccumulateMaskedUint32x8 x y z mask) + // result: (VPDPBUSDMasked256 x y z (VPMOVVec32x8ToM mask)) for { x := v_0 y := v_1 - v.reset(OpAMD64CMOVQHI) - v0 := b.NewValue0(v.Pos, OpAMD64MOVQconst, typ.UInt64) - v0.AuxInt = int64ToAuxInt(0) - v1 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags) - v1.AddArg2(x, y) - v.AddArg3(x, v0, v1) + z := v_2 + mask := v_3 + v.reset(OpAMD64VPDPBUSDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(x, y, z, v0) return true } } -func rewriteValueAMD64_OpStore(v *Value) bool { +func rewriteValueAMD64_OpXorMaskedInt32x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (Store {t} ptr val mem) - // cond: t.Size() == 8 && t.IsFloat() - // result: (MOVSDstore ptr val mem) - for { - t := auxToType(v.Aux) - ptr := v_0 - val := v_1 - mem := v_2 - if !(t.Size() == 8 && t.IsFloat()) { - break - } - v.reset(OpAMD64MOVSDstore) - v.AddArg3(ptr, val, mem) - return true - } - // match: (Store {t} ptr val mem) - // cond: t.Size() == 4 && t.IsFloat() - // result: (MOVSSstore ptr val mem) - for { - t := auxToType(v.Aux) - ptr := v_0 - val := v_1 - mem := v_2 - if !(t.Size() == 4 && t.IsFloat()) { - break - } - v.reset(OpAMD64MOVSSstore) - v.AddArg3(ptr, val, mem) - return true - } - // match: (Store {t} ptr val mem) - // cond: t.Size() == 8 && !t.IsFloat() - // result: (MOVQstore ptr val mem) - for { - t := auxToType(v.Aux) - ptr := v_0 - val := v_1 - mem := v_2 - if !(t.Size() == 8 && !t.IsFloat()) { - break - } - v.reset(OpAMD64MOVQstore) - v.AddArg3(ptr, val, mem) - return true - } - // match: (Store {t} ptr val mem) - // cond: t.Size() == 4 && !t.IsFloat() - // result: (MOVLstore ptr val mem) - for { - t := auxToType(v.Aux) - ptr := v_0 - val := v_1 - mem := v_2 - if !(t.Size() == 4 && !t.IsFloat()) { - break - } - v.reset(OpAMD64MOVLstore) - v.AddArg3(ptr, val, mem) - return true - } - // match: (Store {t} ptr val mem) - // cond: t.Size() == 2 - // result: (MOVWstore ptr val mem) - for { - t := auxToType(v.Aux) - ptr := v_0 - val := v_1 - mem := v_2 - if !(t.Size() == 2) { - break - } - v.reset(OpAMD64MOVWstore) - v.AddArg3(ptr, val, mem) - return true - } - // match: (Store {t} ptr val mem) - // cond: t.Size() == 1 - // result: (MOVBstore ptr val mem) - for { - t := auxToType(v.Aux) - ptr := v_0 - val := v_1 - mem := v_2 - if !(t.Size() == 1) { - break - } - v.reset(OpAMD64MOVBstore) - v.AddArg3(ptr, val, mem) - return true - } - // match: (Store {t} ptr val mem) - // cond: t.Size() == 16 - // result: (VMOVDQUstore128 ptr val mem) - for { - t := auxToType(v.Aux) - ptr := v_0 - val := v_1 - mem := v_2 - if !(t.Size() == 16) { - break - } - v.reset(OpAMD64VMOVDQUstore128) - v.AddArg3(ptr, val, mem) - return true - } - // match: (Store {t} ptr val mem) - // cond: t.Size() == 32 - // result: (VMOVDQUstore256 ptr val mem) - for { - t := auxToType(v.Aux) - ptr := v_0 - val := v_1 - mem := v_2 - if !(t.Size() == 32) { - break - } - v.reset(OpAMD64VMOVDQUstore256) - v.AddArg3(ptr, val, mem) - return true - } - // match: (Store {t} ptr val mem) - // cond: t.Size() == 64 - // result: (VMOVDQUstore512 ptr val mem) + b := v.Block + // match: (XorMaskedInt32x16 x y mask) + // result: (VPXORDMasked512 x y (VPMOVVec32x16ToM mask)) for { - t := auxToType(v.Aux) - ptr := v_0 - val := v_1 - mem := v_2 - if !(t.Size() == 64) { - break - } - v.reset(OpAMD64VMOVDQUstore512) - v.AddArg3(ptr, val, mem) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPXORDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - return false } -func rewriteValueAMD64_OpTrunc(v *Value) bool { +func rewriteValueAMD64_OpXorMaskedInt32x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (Trunc x) - // result: (ROUNDSD [3] x) + b := v.Block + // match: (XorMaskedInt32x4 x y mask) + // result: (VPXORDMasked128 x y (VPMOVVec32x4ToM mask)) for { x := v_0 - v.reset(OpAMD64ROUNDSD) - v.AuxInt = int8ToAuxInt(3) - v.AddArg(x) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPXORDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpTruncFloat32x4(v *Value) bool { +func rewriteValueAMD64_OpXorMaskedInt32x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (TruncFloat32x4 x) - // result: (VROUNDPS128 [3] x) + b := v.Block + // match: (XorMaskedInt32x8 x y mask) + // result: (VPXORDMasked256 x y (VPMOVVec32x8ToM mask)) for { x := v_0 - v.reset(OpAMD64VROUNDPS128) - v.AuxInt = int8ToAuxInt(3) - v.AddArg(x) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPXORDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpTruncFloat32x8(v *Value) bool { +func rewriteValueAMD64_OpXorMaskedInt64x2(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (TruncFloat32x8 x) - // result: (VROUNDPS256 [3] x) + b := v.Block + // match: (XorMaskedInt64x2 x y mask) + // result: (VPXORQMasked128 x y (VPMOVVec64x2ToM mask)) for { x := v_0 - v.reset(OpAMD64VROUNDPS256) - v.AuxInt = int8ToAuxInt(3) - v.AddArg(x) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPXORQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpTruncFloat64x2(v *Value) bool { +func rewriteValueAMD64_OpXorMaskedInt64x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (TruncFloat64x2 x) - // result: (VROUNDPD128 [3] x) + b := v.Block + // match: (XorMaskedInt64x4 x y mask) + // result: (VPXORQMasked256 x y (VPMOVVec64x4ToM mask)) for { x := v_0 - v.reset(OpAMD64VROUNDPD128) - v.AuxInt = int8ToAuxInt(3) - v.AddArg(x) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPXORQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpTruncFloat64x4(v *Value) bool { +func rewriteValueAMD64_OpXorMaskedInt64x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (TruncFloat64x4 x) - // result: (VROUNDPD256 [3] x) + b := v.Block + // match: (XorMaskedInt64x8 x y mask) + // result: (VPXORQMasked512 x y (VPMOVVec64x8ToM mask)) for { x := v_0 - v.reset(OpAMD64VROUNDPD256) - v.AuxInt = int8ToAuxInt(3) - v.AddArg(x) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPXORQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpTruncWithPrecisionFloat32x16(v *Value) bool { +func rewriteValueAMD64_OpXorMaskedUint32x16(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (TruncWithPrecisionFloat32x16 [a] x) - // result: (VRNDSCALEPS512 [a+3] x) + b := v.Block + // match: (XorMaskedUint32x16 x y mask) + // result: (VPXORDMasked512 x y (VPMOVVec32x16ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - v.reset(OpAMD64VRNDSCALEPS512) - v.AuxInt = int8ToAuxInt(a + 3) - v.AddArg(x) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPXORDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpTruncWithPrecisionFloat32x4(v *Value) bool { +func rewriteValueAMD64_OpXorMaskedUint32x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (TruncWithPrecisionFloat32x4 [a] x) - // result: (VRNDSCALEPS128 [a+3] x) + b := v.Block + // match: (XorMaskedUint32x4 x y mask) + // result: (VPXORDMasked128 x y (VPMOVVec32x4ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - v.reset(OpAMD64VRNDSCALEPS128) - v.AuxInt = int8ToAuxInt(a + 3) - v.AddArg(x) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPXORDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpTruncWithPrecisionFloat32x8(v *Value) bool { +func rewriteValueAMD64_OpXorMaskedUint32x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (TruncWithPrecisionFloat32x8 [a] x) - // result: (VRNDSCALEPS256 [a+3] x) + b := v.Block + // match: (XorMaskedUint32x8 x y mask) + // result: (VPXORDMasked256 x y (VPMOVVec32x8ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - v.reset(OpAMD64VRNDSCALEPS256) - v.AuxInt = int8ToAuxInt(a + 3) - v.AddArg(x) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPXORDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpTruncWithPrecisionFloat64x2(v *Value) bool { +func rewriteValueAMD64_OpXorMaskedUint64x2(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (TruncWithPrecisionFloat64x2 [a] x) - // result: (VRNDSCALEPD128 [a+3] x) + b := v.Block + // match: (XorMaskedUint64x2 x y mask) + // result: (VPXORQMasked128 x y (VPMOVVec64x2ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - v.reset(OpAMD64VRNDSCALEPD128) - v.AuxInt = int8ToAuxInt(a + 3) - v.AddArg(x) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPXORQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpTruncWithPrecisionFloat64x4(v *Value) bool { +func rewriteValueAMD64_OpXorMaskedUint64x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (TruncWithPrecisionFloat64x4 [a] x) - // result: (VRNDSCALEPD256 [a+3] x) + b := v.Block + // match: (XorMaskedUint64x4 x y mask) + // result: (VPXORQMasked256 x y (VPMOVVec64x4ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - v.reset(OpAMD64VRNDSCALEPD256) - v.AuxInt = int8ToAuxInt(a + 3) - v.AddArg(x) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPXORQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpTruncWithPrecisionFloat64x8(v *Value) bool { +func rewriteValueAMD64_OpXorMaskedUint64x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (TruncWithPrecisionFloat64x8 [a] x) - // result: (VRNDSCALEPD512 [a+3] x) + b := v.Block + // match: (XorMaskedUint64x8 x y mask) + // result: (VPXORQMasked512 x y (VPMOVVec64x8ToM mask)) for { - a := auxIntToInt8(v.AuxInt) x := v_0 - v.reset(OpAMD64VRNDSCALEPD512) - v.AuxInt = int8ToAuxInt(a + 3) - v.AddArg(x) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPXORQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } diff --git a/src/cmd/compile/internal/ssagen/simdintrinsics.go b/src/cmd/compile/internal/ssagen/simdintrinsics.go index a476e66845..c6e8961738 100644 --- a/src/cmd/compile/internal/ssagen/simdintrinsics.go +++ b/src/cmd/compile/internal/ssagen/simdintrinsics.go @@ -23,6 +23,18 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Int64x2.Absolute", opLen1(ssa.OpAbsoluteInt64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int64x4.Absolute", opLen1(ssa.OpAbsoluteInt64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int64x8.Absolute", opLen1(ssa.OpAbsoluteInt64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int8x16.AbsoluteMasked", opLen2(ssa.OpAbsoluteMaskedInt8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int8x32.AbsoluteMasked", opLen2(ssa.OpAbsoluteMaskedInt8x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int8x64.AbsoluteMasked", opLen2(ssa.OpAbsoluteMaskedInt8x64, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int16x8.AbsoluteMasked", opLen2(ssa.OpAbsoluteMaskedInt16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x16.AbsoluteMasked", opLen2(ssa.OpAbsoluteMaskedInt16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int16x32.AbsoluteMasked", opLen2(ssa.OpAbsoluteMaskedInt16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int32x4.AbsoluteMasked", opLen2(ssa.OpAbsoluteMaskedInt32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.AbsoluteMasked", opLen2(ssa.OpAbsoluteMaskedInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x16.AbsoluteMasked", opLen2(ssa.OpAbsoluteMaskedInt32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int64x2.AbsoluteMasked", opLen2(ssa.OpAbsoluteMaskedInt64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.AbsoluteMasked", opLen2(ssa.OpAbsoluteMaskedInt64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x8.AbsoluteMasked", opLen2(ssa.OpAbsoluteMaskedInt64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float32x4.Add", opLen2(ssa.OpAddFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x8.Add", opLen2(ssa.OpAddFloat32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float32x16.Add", opLen2(ssa.OpAddFloat32x16, types.TypeVec512), sys.AMD64) @@ -53,6 +65,36 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Uint64x2.Add", opLen2(ssa.OpAddUint64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint64x4.Add", opLen2(ssa.OpAddUint64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint64x8.Add", opLen2(ssa.OpAddUint64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Float32x4.AddMasked", opLen3(ssa.OpAddMaskedFloat32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float32x8.AddMasked", opLen3(ssa.OpAddMaskedFloat32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float32x16.AddMasked", opLen3(ssa.OpAddMaskedFloat32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Float64x2.AddMasked", opLen3(ssa.OpAddMaskedFloat64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float64x4.AddMasked", opLen3(ssa.OpAddMaskedFloat64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float64x8.AddMasked", opLen3(ssa.OpAddMaskedFloat64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int8x16.AddMasked", opLen3(ssa.OpAddMaskedInt8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int8x32.AddMasked", opLen3(ssa.OpAddMaskedInt8x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int8x64.AddMasked", opLen3(ssa.OpAddMaskedInt8x64, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int16x8.AddMasked", opLen3(ssa.OpAddMaskedInt16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x16.AddMasked", opLen3(ssa.OpAddMaskedInt16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int16x32.AddMasked", opLen3(ssa.OpAddMaskedInt16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int32x4.AddMasked", opLen3(ssa.OpAddMaskedInt32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.AddMasked", opLen3(ssa.OpAddMaskedInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x16.AddMasked", opLen3(ssa.OpAddMaskedInt32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int64x2.AddMasked", opLen3(ssa.OpAddMaskedInt64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.AddMasked", opLen3(ssa.OpAddMaskedInt64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x8.AddMasked", opLen3(ssa.OpAddMaskedInt64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint8x16.AddMasked", opLen3(ssa.OpAddMaskedUint8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint8x32.AddMasked", opLen3(ssa.OpAddMaskedUint8x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint8x64.AddMasked", opLen3(ssa.OpAddMaskedUint8x64, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint16x8.AddMasked", opLen3(ssa.OpAddMaskedUint16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint16x16.AddMasked", opLen3(ssa.OpAddMaskedUint16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint16x32.AddMasked", opLen3(ssa.OpAddMaskedUint16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint32x4.AddMasked", opLen3(ssa.OpAddMaskedUint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x8.AddMasked", opLen3(ssa.OpAddMaskedUint32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x16.AddMasked", opLen3(ssa.OpAddMaskedUint32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint64x2.AddMasked", opLen3(ssa.OpAddMaskedUint64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x4.AddMasked", opLen3(ssa.OpAddMaskedUint64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint64x8.AddMasked", opLen3(ssa.OpAddMaskedUint64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float32x4.AddSub", opLen2(ssa.OpAddSubFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x8.AddSub", opLen2(ssa.OpAddSubFloat32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float64x2.AddSub", opLen2(ssa.OpAddSubFloat64x2, types.TypeVec128), sys.AMD64) @@ -77,6 +119,18 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Uint64x2.And", opLen2(ssa.OpAndUint64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint64x4.And", opLen2(ssa.OpAndUint64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint64x8.And", opLen2(ssa.OpAndUint64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int32x4.AndMasked", opLen3(ssa.OpAndMaskedInt32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.AndMasked", opLen3(ssa.OpAndMaskedInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x16.AndMasked", opLen3(ssa.OpAndMaskedInt32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int64x2.AndMasked", opLen3(ssa.OpAndMaskedInt64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.AndMasked", opLen3(ssa.OpAndMaskedInt64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x8.AndMasked", opLen3(ssa.OpAndMaskedInt64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint32x4.AndMasked", opLen3(ssa.OpAndMaskedUint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x8.AndMasked", opLen3(ssa.OpAndMaskedUint32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x16.AndMasked", opLen3(ssa.OpAndMaskedUint32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint64x2.AndMasked", opLen3(ssa.OpAndMaskedUint64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x4.AndMasked", opLen3(ssa.OpAndMaskedUint64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint64x8.AndMasked", opLen3(ssa.OpAndMaskedUint64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int8x16.AndNot", opLen2(ssa.OpAndNotInt8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int8x32.AndNot", opLen2(ssa.OpAndNotInt8x32, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int16x8.AndNot", opLen2(ssa.OpAndNotInt16x8, types.TypeVec128), sys.AMD64) @@ -97,24 +151,54 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Uint64x2.AndNot", opLen2(ssa.OpAndNotUint64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint64x4.AndNot", opLen2(ssa.OpAndNotUint64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint64x8.AndNot", opLen2(ssa.OpAndNotUint64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int32x4.AndNotMasked", opLen3(ssa.OpAndNotMaskedInt32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.AndNotMasked", opLen3(ssa.OpAndNotMaskedInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x16.AndNotMasked", opLen3(ssa.OpAndNotMaskedInt32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int64x2.AndNotMasked", opLen3(ssa.OpAndNotMaskedInt64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.AndNotMasked", opLen3(ssa.OpAndNotMaskedInt64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x8.AndNotMasked", opLen3(ssa.OpAndNotMaskedInt64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint32x4.AndNotMasked", opLen3(ssa.OpAndNotMaskedUint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x8.AndNotMasked", opLen3(ssa.OpAndNotMaskedUint32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x16.AndNotMasked", opLen3(ssa.OpAndNotMaskedUint32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint64x2.AndNotMasked", opLen3(ssa.OpAndNotMaskedUint64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x4.AndNotMasked", opLen3(ssa.OpAndNotMaskedUint64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint64x8.AndNotMasked", opLen3(ssa.OpAndNotMaskedUint64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float32x4.ApproximateReciprocal", opLen1(ssa.OpApproximateReciprocalFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x8.ApproximateReciprocal", opLen1(ssa.OpApproximateReciprocalFloat32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float32x16.ApproximateReciprocal", opLen1(ssa.OpApproximateReciprocalFloat32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float64x2.ApproximateReciprocal", opLen1(ssa.OpApproximateReciprocalFloat64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float64x4.ApproximateReciprocal", opLen1(ssa.OpApproximateReciprocalFloat64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float64x8.ApproximateReciprocal", opLen1(ssa.OpApproximateReciprocalFloat64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Float32x4.ApproximateReciprocalMasked", opLen2(ssa.OpApproximateReciprocalMaskedFloat32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float32x8.ApproximateReciprocalMasked", opLen2(ssa.OpApproximateReciprocalMaskedFloat32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float32x16.ApproximateReciprocalMasked", opLen2(ssa.OpApproximateReciprocalMaskedFloat32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Float64x2.ApproximateReciprocalMasked", opLen2(ssa.OpApproximateReciprocalMaskedFloat64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float64x4.ApproximateReciprocalMasked", opLen2(ssa.OpApproximateReciprocalMaskedFloat64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float64x8.ApproximateReciprocalMasked", opLen2(ssa.OpApproximateReciprocalMaskedFloat64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float32x4.ApproximateReciprocalOfSqrt", opLen1(ssa.OpApproximateReciprocalOfSqrtFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x8.ApproximateReciprocalOfSqrt", opLen1(ssa.OpApproximateReciprocalOfSqrtFloat32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float32x16.ApproximateReciprocalOfSqrt", opLen1(ssa.OpApproximateReciprocalOfSqrtFloat32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float64x2.ApproximateReciprocalOfSqrt", opLen1(ssa.OpApproximateReciprocalOfSqrtFloat64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float64x4.ApproximateReciprocalOfSqrt", opLen1(ssa.OpApproximateReciprocalOfSqrtFloat64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float64x8.ApproximateReciprocalOfSqrt", opLen1(ssa.OpApproximateReciprocalOfSqrtFloat64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Float32x4.ApproximateReciprocalOfSqrtMasked", opLen2(ssa.OpApproximateReciprocalOfSqrtMaskedFloat32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float32x8.ApproximateReciprocalOfSqrtMasked", opLen2(ssa.OpApproximateReciprocalOfSqrtMaskedFloat32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float32x16.ApproximateReciprocalOfSqrtMasked", opLen2(ssa.OpApproximateReciprocalOfSqrtMaskedFloat32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Float64x2.ApproximateReciprocalOfSqrtMasked", opLen2(ssa.OpApproximateReciprocalOfSqrtMaskedFloat64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float64x4.ApproximateReciprocalOfSqrtMasked", opLen2(ssa.OpApproximateReciprocalOfSqrtMaskedFloat64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float64x8.ApproximateReciprocalOfSqrtMasked", opLen2(ssa.OpApproximateReciprocalOfSqrtMaskedFloat64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint8x16.Average", opLen2(ssa.OpAverageUint8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint8x32.Average", opLen2(ssa.OpAverageUint8x32, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint8x64.Average", opLen2(ssa.OpAverageUint8x64, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint16x8.Average", opLen2(ssa.OpAverageUint16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint16x16.Average", opLen2(ssa.OpAverageUint16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint16x32.Average", opLen2(ssa.OpAverageUint16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint8x16.AverageMasked", opLen3(ssa.OpAverageMaskedUint8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint8x32.AverageMasked", opLen3(ssa.OpAverageMaskedUint8x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint8x64.AverageMasked", opLen3(ssa.OpAverageMaskedUint8x64, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint16x8.AverageMasked", opLen3(ssa.OpAverageMaskedUint16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint16x16.AverageMasked", opLen3(ssa.OpAverageMaskedUint16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint16x32.AverageMasked", opLen3(ssa.OpAverageMaskedUint16x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float32x4.Ceil", opLen1(ssa.OpCeilFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x8.Ceil", opLen1(ssa.OpCeilFloat32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float64x2.Ceil", opLen1(ssa.OpCeilFloat64x2, types.TypeVec128), sys.AMD64) @@ -125,36 +209,72 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Float64x2.CeilWithPrecision", opLen1Imm8(ssa.OpCeilWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float64x4.CeilWithPrecision", opLen1Imm8(ssa.OpCeilWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float64x8.CeilWithPrecision", opLen1Imm8(ssa.OpCeilWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x4.CeilWithPrecisionMasked", opLen2Imm8(ssa.OpCeilWithPrecisionMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float32x8.CeilWithPrecisionMasked", opLen2Imm8(ssa.OpCeilWithPrecisionMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float32x16.CeilWithPrecisionMasked", opLen2Imm8(ssa.OpCeilWithPrecisionMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float64x2.CeilWithPrecisionMasked", opLen2Imm8(ssa.OpCeilWithPrecisionMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float64x4.CeilWithPrecisionMasked", opLen2Imm8(ssa.OpCeilWithPrecisionMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x8.CeilWithPrecisionMasked", opLen2Imm8(ssa.OpCeilWithPrecisionMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float32x4.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float32x8.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float32x16.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float64x2.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float64x4.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float64x8.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x4.DiffWithCeilWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithCeilWithPrecisionMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float32x8.DiffWithCeilWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithCeilWithPrecisionMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float32x16.DiffWithCeilWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithCeilWithPrecisionMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float64x2.DiffWithCeilWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithCeilWithPrecisionMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float64x4.DiffWithCeilWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithCeilWithPrecisionMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x8.DiffWithCeilWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithCeilWithPrecisionMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float32x4.DiffWithFloorWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float32x8.DiffWithFloorWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float32x16.DiffWithFloorWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float64x2.DiffWithFloorWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float64x4.DiffWithFloorWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float64x8.DiffWithFloorWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x4.DiffWithFloorWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithFloorWithPrecisionMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float32x8.DiffWithFloorWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithFloorWithPrecisionMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float32x16.DiffWithFloorWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithFloorWithPrecisionMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float64x2.DiffWithFloorWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithFloorWithPrecisionMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float64x4.DiffWithFloorWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithFloorWithPrecisionMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x8.DiffWithFloorWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithFloorWithPrecisionMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float32x4.DiffWithRoundWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float32x8.DiffWithRoundWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float32x16.DiffWithRoundWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float64x2.DiffWithRoundWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float64x4.DiffWithRoundWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float64x8.DiffWithRoundWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x4.DiffWithRoundWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithRoundWithPrecisionMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float32x8.DiffWithRoundWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithRoundWithPrecisionMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float32x16.DiffWithRoundWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithRoundWithPrecisionMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float64x2.DiffWithRoundWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithRoundWithPrecisionMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float64x4.DiffWithRoundWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithRoundWithPrecisionMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x8.DiffWithRoundWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithRoundWithPrecisionMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float32x4.DiffWithTruncWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float32x8.DiffWithTruncWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float32x16.DiffWithTruncWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float64x2.DiffWithTruncWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float64x4.DiffWithTruncWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float64x8.DiffWithTruncWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x4.DiffWithTruncWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithTruncWithPrecisionMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float32x8.DiffWithTruncWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithTruncWithPrecisionMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float32x16.DiffWithTruncWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithTruncWithPrecisionMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float64x2.DiffWithTruncWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithTruncWithPrecisionMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float64x4.DiffWithTruncWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithTruncWithPrecisionMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x8.DiffWithTruncWithPrecisionMasked", opLen2Imm8(ssa.OpDiffWithTruncWithPrecisionMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float32x4.Div", opLen2(ssa.OpDivFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x8.Div", opLen2(ssa.OpDivFloat32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float32x16.Div", opLen2(ssa.OpDivFloat32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float64x2.Div", opLen2(ssa.OpDivFloat64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float64x4.Div", opLen2(ssa.OpDivFloat64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float64x8.Div", opLen2(ssa.OpDivFloat64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Float32x4.DivMasked", opLen3(ssa.OpDivMaskedFloat32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float32x8.DivMasked", opLen3(ssa.OpDivMaskedFloat32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float32x16.DivMasked", opLen3(ssa.OpDivMaskedFloat32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Float64x2.DivMasked", opLen3(ssa.OpDivMaskedFloat64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float64x4.DivMasked", opLen3(ssa.OpDivMaskedFloat64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float64x8.DivMasked", opLen3(ssa.OpDivMaskedFloat64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float64x2.DotProdBroadcast", opLen2(ssa.OpDotProdBroadcastFloat64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int8x16.Equal", opLen2(ssa.OpEqualInt8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int8x32.Equal", opLen2(ssa.OpEqualInt8x32, types.TypeVec256), sys.AMD64) @@ -186,6 +306,36 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Uint64x2.Equal", opLen2(ssa.OpEqualUint64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint64x4.Equal", opLen2(ssa.OpEqualUint64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint64x8.Equal", opLen2(ssa.OpEqualUint64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Float32x4.EqualMasked", opLen3(ssa.OpEqualMaskedFloat32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float32x8.EqualMasked", opLen3(ssa.OpEqualMaskedFloat32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float32x16.EqualMasked", opLen3(ssa.OpEqualMaskedFloat32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Float64x2.EqualMasked", opLen3(ssa.OpEqualMaskedFloat64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float64x4.EqualMasked", opLen3(ssa.OpEqualMaskedFloat64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float64x8.EqualMasked", opLen3(ssa.OpEqualMaskedFloat64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int8x16.EqualMasked", opLen3(ssa.OpEqualMaskedInt8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int8x32.EqualMasked", opLen3(ssa.OpEqualMaskedInt8x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int8x64.EqualMasked", opLen3(ssa.OpEqualMaskedInt8x64, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int16x8.EqualMasked", opLen3(ssa.OpEqualMaskedInt16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x16.EqualMasked", opLen3(ssa.OpEqualMaskedInt16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int16x32.EqualMasked", opLen3(ssa.OpEqualMaskedInt16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int32x4.EqualMasked", opLen3(ssa.OpEqualMaskedInt32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.EqualMasked", opLen3(ssa.OpEqualMaskedInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x16.EqualMasked", opLen3(ssa.OpEqualMaskedInt32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int64x2.EqualMasked", opLen3(ssa.OpEqualMaskedInt64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.EqualMasked", opLen3(ssa.OpEqualMaskedInt64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x8.EqualMasked", opLen3(ssa.OpEqualMaskedInt64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint8x16.EqualMasked", opLen3(ssa.OpEqualMaskedUint8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint8x32.EqualMasked", opLen3(ssa.OpEqualMaskedUint8x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint8x64.EqualMasked", opLen3(ssa.OpEqualMaskedUint8x64, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint16x8.EqualMasked", opLen3(ssa.OpEqualMaskedUint16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint16x16.EqualMasked", opLen3(ssa.OpEqualMaskedUint16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint16x32.EqualMasked", opLen3(ssa.OpEqualMaskedUint16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint32x4.EqualMasked", opLen3(ssa.OpEqualMaskedUint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x8.EqualMasked", opLen3(ssa.OpEqualMaskedUint32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x16.EqualMasked", opLen3(ssa.OpEqualMaskedUint32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint64x2.EqualMasked", opLen3(ssa.OpEqualMaskedUint64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x4.EqualMasked", opLen3(ssa.OpEqualMaskedUint64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint64x8.EqualMasked", opLen3(ssa.OpEqualMaskedUint64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float32x4.Floor", opLen1(ssa.OpFloorFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x8.Floor", opLen1(ssa.OpFloorFloat32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float64x2.Floor", opLen1(ssa.OpFloorFloat64x2, types.TypeVec128), sys.AMD64) @@ -196,33 +346,66 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Float64x2.FloorWithPrecision", opLen1Imm8(ssa.OpFloorWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float64x4.FloorWithPrecision", opLen1Imm8(ssa.OpFloorWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float64x8.FloorWithPrecision", opLen1Imm8(ssa.OpFloorWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x4.FloorWithPrecisionMasked", opLen2Imm8(ssa.OpFloorWithPrecisionMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float32x8.FloorWithPrecisionMasked", opLen2Imm8(ssa.OpFloorWithPrecisionMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float32x16.FloorWithPrecisionMasked", opLen2Imm8(ssa.OpFloorWithPrecisionMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float64x2.FloorWithPrecisionMasked", opLen2Imm8(ssa.OpFloorWithPrecisionMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float64x4.FloorWithPrecisionMasked", opLen2Imm8(ssa.OpFloorWithPrecisionMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x8.FloorWithPrecisionMasked", opLen2Imm8(ssa.OpFloorWithPrecisionMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Float32x4.FusedMultiplyAdd", opLen3(ssa.OpFusedMultiplyAddFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x8.FusedMultiplyAdd", opLen3(ssa.OpFusedMultiplyAddFloat32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float32x16.FusedMultiplyAdd", opLen3(ssa.OpFusedMultiplyAddFloat32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float64x2.FusedMultiplyAdd", opLen3(ssa.OpFusedMultiplyAddFloat64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float64x4.FusedMultiplyAdd", opLen3(ssa.OpFusedMultiplyAddFloat64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float64x8.FusedMultiplyAdd", opLen3(ssa.OpFusedMultiplyAddFloat64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Float32x4.FusedMultiplyAddMasked", opLen4(ssa.OpFusedMultiplyAddMaskedFloat32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float32x8.FusedMultiplyAddMasked", opLen4(ssa.OpFusedMultiplyAddMaskedFloat32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float32x16.FusedMultiplyAddMasked", opLen4(ssa.OpFusedMultiplyAddMaskedFloat32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Float64x2.FusedMultiplyAddMasked", opLen4(ssa.OpFusedMultiplyAddMaskedFloat64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float64x4.FusedMultiplyAddMasked", opLen4(ssa.OpFusedMultiplyAddMaskedFloat64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float64x8.FusedMultiplyAddMasked", opLen4(ssa.OpFusedMultiplyAddMaskedFloat64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float32x4.FusedMultiplyAddSub", opLen3(ssa.OpFusedMultiplyAddSubFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x8.FusedMultiplyAddSub", opLen3(ssa.OpFusedMultiplyAddSubFloat32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float32x16.FusedMultiplyAddSub", opLen3(ssa.OpFusedMultiplyAddSubFloat32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float64x2.FusedMultiplyAddSub", opLen3(ssa.OpFusedMultiplyAddSubFloat64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float64x4.FusedMultiplyAddSub", opLen3(ssa.OpFusedMultiplyAddSubFloat64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float64x8.FusedMultiplyAddSub", opLen3(ssa.OpFusedMultiplyAddSubFloat64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Float32x4.FusedMultiplyAddSubMasked", opLen4(ssa.OpFusedMultiplyAddSubMaskedFloat32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float32x8.FusedMultiplyAddSubMasked", opLen4(ssa.OpFusedMultiplyAddSubMaskedFloat32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float32x16.FusedMultiplyAddSubMasked", opLen4(ssa.OpFusedMultiplyAddSubMaskedFloat32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Float64x2.FusedMultiplyAddSubMasked", opLen4(ssa.OpFusedMultiplyAddSubMaskedFloat64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float64x4.FusedMultiplyAddSubMasked", opLen4(ssa.OpFusedMultiplyAddSubMaskedFloat64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float64x8.FusedMultiplyAddSubMasked", opLen4(ssa.OpFusedMultiplyAddSubMaskedFloat64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float32x4.FusedMultiplySubAdd", opLen3(ssa.OpFusedMultiplySubAddFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x8.FusedMultiplySubAdd", opLen3(ssa.OpFusedMultiplySubAddFloat32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float32x16.FusedMultiplySubAdd", opLen3(ssa.OpFusedMultiplySubAddFloat32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float64x2.FusedMultiplySubAdd", opLen3(ssa.OpFusedMultiplySubAddFloat64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float64x4.FusedMultiplySubAdd", opLen3(ssa.OpFusedMultiplySubAddFloat64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float64x8.FusedMultiplySubAdd", opLen3(ssa.OpFusedMultiplySubAddFloat64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Float32x4.FusedMultiplySubAddMasked", opLen4(ssa.OpFusedMultiplySubAddMaskedFloat32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float32x8.FusedMultiplySubAddMasked", opLen4(ssa.OpFusedMultiplySubAddMaskedFloat32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float32x16.FusedMultiplySubAddMasked", opLen4(ssa.OpFusedMultiplySubAddMaskedFloat32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Float64x2.FusedMultiplySubAddMasked", opLen4(ssa.OpFusedMultiplySubAddMaskedFloat64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float64x4.FusedMultiplySubAddMasked", opLen4(ssa.OpFusedMultiplySubAddMaskedFloat64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float64x8.FusedMultiplySubAddMasked", opLen4(ssa.OpFusedMultiplySubAddMaskedFloat64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint8x16.GaloisFieldAffineTransform", opLen2Imm8_2I(ssa.OpGaloisFieldAffineTransformUint8x16, types.TypeVec128, 0), sys.AMD64) addF(simdPackage, "Uint8x32.GaloisFieldAffineTransform", opLen2Imm8_2I(ssa.OpGaloisFieldAffineTransformUint8x32, types.TypeVec256, 0), sys.AMD64) addF(simdPackage, "Uint8x64.GaloisFieldAffineTransform", opLen2Imm8_2I(ssa.OpGaloisFieldAffineTransformUint8x64, types.TypeVec512, 0), sys.AMD64) addF(simdPackage, "Uint8x16.GaloisFieldAffineTransformInversed", opLen2Imm8_2I(ssa.OpGaloisFieldAffineTransformInversedUint8x16, types.TypeVec128, 0), sys.AMD64) addF(simdPackage, "Uint8x32.GaloisFieldAffineTransformInversed", opLen2Imm8_2I(ssa.OpGaloisFieldAffineTransformInversedUint8x32, types.TypeVec256, 0), sys.AMD64) addF(simdPackage, "Uint8x64.GaloisFieldAffineTransformInversed", opLen2Imm8_2I(ssa.OpGaloisFieldAffineTransformInversedUint8x64, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Uint8x16.GaloisFieldAffineTransformInversedMasked", opLen3Imm8_2I(ssa.OpGaloisFieldAffineTransformInversedMaskedUint8x16, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Uint8x32.GaloisFieldAffineTransformInversedMasked", opLen3Imm8_2I(ssa.OpGaloisFieldAffineTransformInversedMaskedUint8x32, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Uint8x64.GaloisFieldAffineTransformInversedMasked", opLen3Imm8_2I(ssa.OpGaloisFieldAffineTransformInversedMaskedUint8x64, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Uint8x16.GaloisFieldAffineTransformMasked", opLen3Imm8_2I(ssa.OpGaloisFieldAffineTransformMaskedUint8x16, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Uint8x32.GaloisFieldAffineTransformMasked", opLen3Imm8_2I(ssa.OpGaloisFieldAffineTransformMaskedUint8x32, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Uint8x64.GaloisFieldAffineTransformMasked", opLen3Imm8_2I(ssa.OpGaloisFieldAffineTransformMaskedUint8x64, types.TypeVec512, 0), sys.AMD64) addF(simdPackage, "Uint8x16.GaloisFieldMul", opLen2(ssa.OpGaloisFieldMulUint8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint8x32.GaloisFieldMul", opLen2(ssa.OpGaloisFieldMulUint8x32, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint8x64.GaloisFieldMul", opLen2(ssa.OpGaloisFieldMulUint8x64, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint8x16.GaloisFieldMulMasked", opLen3(ssa.OpGaloisFieldMulMaskedUint8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint8x32.GaloisFieldMulMasked", opLen3(ssa.OpGaloisFieldMulMaskedUint8x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint8x64.GaloisFieldMulMasked", opLen3(ssa.OpGaloisFieldMulMaskedUint8x64, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float32x8.Get128", opLen1Imm8(ssa.OpGet128Float32x8, types.TypeVec128, 0), sys.AMD64) addF(simdPackage, "Float64x4.Get128", opLen1Imm8(ssa.OpGet128Float64x4, types.TypeVec128, 0), sys.AMD64) addF(simdPackage, "Int8x32.Get128", opLen1Imm8(ssa.OpGet128Int8x32, types.TypeVec128, 0), sys.AMD64) @@ -301,12 +484,78 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Uint64x2.GreaterEqual", opLen2(ssa.OpGreaterEqualUint64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint64x4.GreaterEqual", opLen2(ssa.OpGreaterEqualUint64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint64x8.GreaterEqual", opLen2(ssa.OpGreaterEqualUint64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Float32x4.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedFloat32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float32x8.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedFloat32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float32x16.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedFloat32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Float64x2.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedFloat64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float64x4.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedFloat64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float64x8.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedFloat64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int8x16.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int8x32.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt8x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int8x64.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt8x64, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int16x8.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x16.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int16x32.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int32x4.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x16.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int64x2.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x8.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint8x16.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint8x32.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint8x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint8x64.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint8x64, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint16x8.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint16x16.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint16x32.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint32x4.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x8.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x16.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint64x2.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x4.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint64x8.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Float32x4.GreaterMasked", opLen3(ssa.OpGreaterMaskedFloat32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float32x8.GreaterMasked", opLen3(ssa.OpGreaterMaskedFloat32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float32x16.GreaterMasked", opLen3(ssa.OpGreaterMaskedFloat32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Float64x2.GreaterMasked", opLen3(ssa.OpGreaterMaskedFloat64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float64x4.GreaterMasked", opLen3(ssa.OpGreaterMaskedFloat64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float64x8.GreaterMasked", opLen3(ssa.OpGreaterMaskedFloat64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int8x16.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int8x32.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt8x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int8x64.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt8x64, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int16x8.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x16.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int16x32.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int32x4.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x16.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int64x2.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x8.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint8x16.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint8x32.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint8x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint8x64.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint8x64, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint16x8.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint16x16.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint16x32.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint32x4.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x8.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x16.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint64x2.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x4.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint64x8.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float32x4.IsNan", opLen2(ssa.OpIsNanFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x8.IsNan", opLen2(ssa.OpIsNanFloat32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float32x16.IsNan", opLen2(ssa.OpIsNanFloat32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float64x2.IsNan", opLen2(ssa.OpIsNanFloat64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float64x4.IsNan", opLen2(ssa.OpIsNanFloat64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float64x8.IsNan", opLen2(ssa.OpIsNanFloat64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Float32x4.IsNanMasked", opLen3(ssa.OpIsNanMaskedFloat32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float32x8.IsNanMasked", opLen3(ssa.OpIsNanMaskedFloat32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float32x16.IsNanMasked", opLen3(ssa.OpIsNanMaskedFloat32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Float64x2.IsNanMasked", opLen3(ssa.OpIsNanMaskedFloat64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float64x4.IsNanMasked", opLen3(ssa.OpIsNanMaskedFloat64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float64x8.IsNanMasked", opLen3(ssa.OpIsNanMaskedFloat64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float32x4.Less", opLen2(ssa.OpLessFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x8.Less", opLen2(ssa.OpLessFloat32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float32x16.Less", opLen2(ssa.OpLessFloat32x16, types.TypeVec512), sys.AMD64) @@ -367,771 +616,66 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Uint64x2.LessEqual", opLen2(ssa.OpLessEqualUint64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint64x4.LessEqual", opLen2(ssa.OpLessEqualUint64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint64x8.LessEqual", opLen2(ssa.OpLessEqualUint64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int8x16.MaskedAbsolute", opLen2(ssa.OpMaskedAbsoluteInt8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int8x32.MaskedAbsolute", opLen2(ssa.OpMaskedAbsoluteInt8x32, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int8x64.MaskedAbsolute", opLen2(ssa.OpMaskedAbsoluteInt8x64, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int16x8.MaskedAbsolute", opLen2(ssa.OpMaskedAbsoluteInt16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int16x16.MaskedAbsolute", opLen2(ssa.OpMaskedAbsoluteInt16x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int16x32.MaskedAbsolute", opLen2(ssa.OpMaskedAbsoluteInt16x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int32x4.MaskedAbsolute", opLen2(ssa.OpMaskedAbsoluteInt32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x8.MaskedAbsolute", opLen2(ssa.OpMaskedAbsoluteInt32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int32x16.MaskedAbsolute", opLen2(ssa.OpMaskedAbsoluteInt32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int64x2.MaskedAbsolute", opLen2(ssa.OpMaskedAbsoluteInt64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int64x4.MaskedAbsolute", opLen2(ssa.OpMaskedAbsoluteInt64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int64x8.MaskedAbsolute", opLen2(ssa.OpMaskedAbsoluteInt64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Float32x4.MaskedAdd", opLen3(ssa.OpMaskedAddFloat32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Float32x8.MaskedAdd", opLen3(ssa.OpMaskedAddFloat32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Float32x16.MaskedAdd", opLen3(ssa.OpMaskedAddFloat32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Float64x2.MaskedAdd", opLen3(ssa.OpMaskedAddFloat64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Float64x4.MaskedAdd", opLen3(ssa.OpMaskedAddFloat64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Float64x8.MaskedAdd", opLen3(ssa.OpMaskedAddFloat64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int8x16.MaskedAdd", opLen3(ssa.OpMaskedAddInt8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int8x32.MaskedAdd", opLen3(ssa.OpMaskedAddInt8x32, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int8x64.MaskedAdd", opLen3(ssa.OpMaskedAddInt8x64, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int16x8.MaskedAdd", opLen3(ssa.OpMaskedAddInt16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int16x16.MaskedAdd", opLen3(ssa.OpMaskedAddInt16x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int16x32.MaskedAdd", opLen3(ssa.OpMaskedAddInt16x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int32x4.MaskedAdd", opLen3(ssa.OpMaskedAddInt32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x8.MaskedAdd", opLen3(ssa.OpMaskedAddInt32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int32x16.MaskedAdd", opLen3(ssa.OpMaskedAddInt32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int64x2.MaskedAdd", opLen3(ssa.OpMaskedAddInt64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int64x4.MaskedAdd", opLen3(ssa.OpMaskedAddInt64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int64x8.MaskedAdd", opLen3(ssa.OpMaskedAddInt64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint8x16.MaskedAdd", opLen3(ssa.OpMaskedAddUint8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint8x32.MaskedAdd", opLen3(ssa.OpMaskedAddUint8x32, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint8x64.MaskedAdd", opLen3(ssa.OpMaskedAddUint8x64, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint16x8.MaskedAdd", opLen3(ssa.OpMaskedAddUint16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint16x16.MaskedAdd", opLen3(ssa.OpMaskedAddUint16x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint16x32.MaskedAdd", opLen3(ssa.OpMaskedAddUint16x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint32x4.MaskedAdd", opLen3(ssa.OpMaskedAddUint32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint32x8.MaskedAdd", opLen3(ssa.OpMaskedAddUint32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint32x16.MaskedAdd", opLen3(ssa.OpMaskedAddUint32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint64x2.MaskedAdd", opLen3(ssa.OpMaskedAddUint64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint64x4.MaskedAdd", opLen3(ssa.OpMaskedAddUint64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint64x8.MaskedAdd", opLen3(ssa.OpMaskedAddUint64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int32x4.MaskedAnd", opLen3(ssa.OpMaskedAndInt32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x8.MaskedAnd", opLen3(ssa.OpMaskedAndInt32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int32x16.MaskedAnd", opLen3(ssa.OpMaskedAndInt32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int64x2.MaskedAnd", opLen3(ssa.OpMaskedAndInt64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int64x4.MaskedAnd", opLen3(ssa.OpMaskedAndInt64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int64x8.MaskedAnd", opLen3(ssa.OpMaskedAndInt64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint32x4.MaskedAnd", opLen3(ssa.OpMaskedAndUint32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint32x8.MaskedAnd", opLen3(ssa.OpMaskedAndUint32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint32x16.MaskedAnd", opLen3(ssa.OpMaskedAndUint32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint64x2.MaskedAnd", opLen3(ssa.OpMaskedAndUint64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint64x4.MaskedAnd", opLen3(ssa.OpMaskedAndUint64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint64x8.MaskedAnd", opLen3(ssa.OpMaskedAndUint64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int32x4.MaskedAndNot", opLen3(ssa.OpMaskedAndNotInt32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x8.MaskedAndNot", opLen3(ssa.OpMaskedAndNotInt32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int32x16.MaskedAndNot", opLen3(ssa.OpMaskedAndNotInt32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int64x2.MaskedAndNot", opLen3(ssa.OpMaskedAndNotInt64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int64x4.MaskedAndNot", opLen3(ssa.OpMaskedAndNotInt64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int64x8.MaskedAndNot", opLen3(ssa.OpMaskedAndNotInt64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint32x4.MaskedAndNot", opLen3(ssa.OpMaskedAndNotUint32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint32x8.MaskedAndNot", opLen3(ssa.OpMaskedAndNotUint32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint32x16.MaskedAndNot", opLen3(ssa.OpMaskedAndNotUint32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint64x2.MaskedAndNot", opLen3(ssa.OpMaskedAndNotUint64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint64x4.MaskedAndNot", opLen3(ssa.OpMaskedAndNotUint64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint64x8.MaskedAndNot", opLen3(ssa.OpMaskedAndNotUint64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Float32x4.MaskedApproximateReciprocal", opLen2(ssa.OpMaskedApproximateReciprocalFloat32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Float32x8.MaskedApproximateReciprocal", opLen2(ssa.OpMaskedApproximateReciprocalFloat32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Float32x16.MaskedApproximateReciprocal", opLen2(ssa.OpMaskedApproximateReciprocalFloat32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Float64x2.MaskedApproximateReciprocal", opLen2(ssa.OpMaskedApproximateReciprocalFloat64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Float64x4.MaskedApproximateReciprocal", opLen2(ssa.OpMaskedApproximateReciprocalFloat64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Float64x8.MaskedApproximateReciprocal", opLen2(ssa.OpMaskedApproximateReciprocalFloat64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Float32x4.MaskedApproximateReciprocalOfSqrt", opLen2(ssa.OpMaskedApproximateReciprocalOfSqrtFloat32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Float32x8.MaskedApproximateReciprocalOfSqrt", opLen2(ssa.OpMaskedApproximateReciprocalOfSqrtFloat32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Float32x16.MaskedApproximateReciprocalOfSqrt", opLen2(ssa.OpMaskedApproximateReciprocalOfSqrtFloat32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Float64x2.MaskedApproximateReciprocalOfSqrt", opLen2(ssa.OpMaskedApproximateReciprocalOfSqrtFloat64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Float64x4.MaskedApproximateReciprocalOfSqrt", opLen2(ssa.OpMaskedApproximateReciprocalOfSqrtFloat64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Float64x8.MaskedApproximateReciprocalOfSqrt", opLen2(ssa.OpMaskedApproximateReciprocalOfSqrtFloat64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint8x16.MaskedAverage", opLen3(ssa.OpMaskedAverageUint8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint8x32.MaskedAverage", opLen3(ssa.OpMaskedAverageUint8x32, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint8x64.MaskedAverage", opLen3(ssa.OpMaskedAverageUint8x64, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint16x8.MaskedAverage", opLen3(ssa.OpMaskedAverageUint16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint16x16.MaskedAverage", opLen3(ssa.OpMaskedAverageUint16x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint16x32.MaskedAverage", opLen3(ssa.OpMaskedAverageUint16x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Float32x4.MaskedCeilWithPrecision", opLen2Imm8(ssa.OpMaskedCeilWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) - addF(simdPackage, "Float32x8.MaskedCeilWithPrecision", opLen2Imm8(ssa.OpMaskedCeilWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) - addF(simdPackage, "Float32x16.MaskedCeilWithPrecision", opLen2Imm8(ssa.OpMaskedCeilWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) - addF(simdPackage, "Float64x2.MaskedCeilWithPrecision", opLen2Imm8(ssa.OpMaskedCeilWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) - addF(simdPackage, "Float64x4.MaskedCeilWithPrecision", opLen2Imm8(ssa.OpMaskedCeilWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) - addF(simdPackage, "Float64x8.MaskedCeilWithPrecision", opLen2Imm8(ssa.OpMaskedCeilWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) - addF(simdPackage, "Float32x4.MaskedDiffWithCeilWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) - addF(simdPackage, "Float32x8.MaskedDiffWithCeilWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) - addF(simdPackage, "Float32x16.MaskedDiffWithCeilWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) - addF(simdPackage, "Float64x2.MaskedDiffWithCeilWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) - addF(simdPackage, "Float64x4.MaskedDiffWithCeilWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) - addF(simdPackage, "Float64x8.MaskedDiffWithCeilWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) - addF(simdPackage, "Float32x4.MaskedDiffWithFloorWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) - addF(simdPackage, "Float32x8.MaskedDiffWithFloorWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) - addF(simdPackage, "Float32x16.MaskedDiffWithFloorWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) - addF(simdPackage, "Float64x2.MaskedDiffWithFloorWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) - addF(simdPackage, "Float64x4.MaskedDiffWithFloorWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) - addF(simdPackage, "Float64x8.MaskedDiffWithFloorWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) - addF(simdPackage, "Float32x4.MaskedDiffWithRoundWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) - addF(simdPackage, "Float32x8.MaskedDiffWithRoundWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) - addF(simdPackage, "Float32x16.MaskedDiffWithRoundWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) - addF(simdPackage, "Float64x2.MaskedDiffWithRoundWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) - addF(simdPackage, "Float64x4.MaskedDiffWithRoundWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) - addF(simdPackage, "Float64x8.MaskedDiffWithRoundWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) - addF(simdPackage, "Float32x4.MaskedDiffWithTruncWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) - addF(simdPackage, "Float32x8.MaskedDiffWithTruncWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) - addF(simdPackage, "Float32x16.MaskedDiffWithTruncWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) - addF(simdPackage, "Float64x2.MaskedDiffWithTruncWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) - addF(simdPackage, "Float64x4.MaskedDiffWithTruncWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) - addF(simdPackage, "Float64x8.MaskedDiffWithTruncWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) - addF(simdPackage, "Float32x4.MaskedDiv", opLen3(ssa.OpMaskedDivFloat32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Float32x8.MaskedDiv", opLen3(ssa.OpMaskedDivFloat32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Float32x16.MaskedDiv", opLen3(ssa.OpMaskedDivFloat32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Float64x2.MaskedDiv", opLen3(ssa.OpMaskedDivFloat64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Float64x4.MaskedDiv", opLen3(ssa.OpMaskedDivFloat64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Float64x8.MaskedDiv", opLen3(ssa.OpMaskedDivFloat64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Float32x4.MaskedEqual", opLen3(ssa.OpMaskedEqualFloat32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Float32x8.MaskedEqual", opLen3(ssa.OpMaskedEqualFloat32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Float32x16.MaskedEqual", opLen3(ssa.OpMaskedEqualFloat32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Float64x2.MaskedEqual", opLen3(ssa.OpMaskedEqualFloat64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Float64x4.MaskedEqual", opLen3(ssa.OpMaskedEqualFloat64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Float64x8.MaskedEqual", opLen3(ssa.OpMaskedEqualFloat64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int8x16.MaskedEqual", opLen3(ssa.OpMaskedEqualInt8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int8x32.MaskedEqual", opLen3(ssa.OpMaskedEqualInt8x32, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int8x64.MaskedEqual", opLen3(ssa.OpMaskedEqualInt8x64, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int16x8.MaskedEqual", opLen3(ssa.OpMaskedEqualInt16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int16x16.MaskedEqual", opLen3(ssa.OpMaskedEqualInt16x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int16x32.MaskedEqual", opLen3(ssa.OpMaskedEqualInt16x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int32x4.MaskedEqual", opLen3(ssa.OpMaskedEqualInt32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x8.MaskedEqual", opLen3(ssa.OpMaskedEqualInt32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int32x16.MaskedEqual", opLen3(ssa.OpMaskedEqualInt32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int64x2.MaskedEqual", opLen3(ssa.OpMaskedEqualInt64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int64x4.MaskedEqual", opLen3(ssa.OpMaskedEqualInt64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int64x8.MaskedEqual", opLen3(ssa.OpMaskedEqualInt64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint8x16.MaskedEqual", opLen3(ssa.OpMaskedEqualUint8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint8x32.MaskedEqual", opLen3(ssa.OpMaskedEqualUint8x32, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint8x64.MaskedEqual", opLen3(ssa.OpMaskedEqualUint8x64, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint16x8.MaskedEqual", opLen3(ssa.OpMaskedEqualUint16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint16x16.MaskedEqual", opLen3(ssa.OpMaskedEqualUint16x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint16x32.MaskedEqual", opLen3(ssa.OpMaskedEqualUint16x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint32x4.MaskedEqual", opLen3(ssa.OpMaskedEqualUint32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint32x8.MaskedEqual", opLen3(ssa.OpMaskedEqualUint32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint32x16.MaskedEqual", opLen3(ssa.OpMaskedEqualUint32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint64x2.MaskedEqual", opLen3(ssa.OpMaskedEqualUint64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint64x4.MaskedEqual", opLen3(ssa.OpMaskedEqualUint64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint64x8.MaskedEqual", opLen3(ssa.OpMaskedEqualUint64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Float32x4.MaskedFloorWithPrecision", opLen2Imm8(ssa.OpMaskedFloorWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) - addF(simdPackage, "Float32x8.MaskedFloorWithPrecision", opLen2Imm8(ssa.OpMaskedFloorWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) - addF(simdPackage, "Float32x16.MaskedFloorWithPrecision", opLen2Imm8(ssa.OpMaskedFloorWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) - addF(simdPackage, "Float64x2.MaskedFloorWithPrecision", opLen2Imm8(ssa.OpMaskedFloorWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) - addF(simdPackage, "Float64x4.MaskedFloorWithPrecision", opLen2Imm8(ssa.OpMaskedFloorWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) - addF(simdPackage, "Float64x8.MaskedFloorWithPrecision", opLen2Imm8(ssa.OpMaskedFloorWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) - addF(simdPackage, "Float32x4.MaskedFusedMultiplyAdd", opLen4(ssa.OpMaskedFusedMultiplyAddFloat32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Float32x8.MaskedFusedMultiplyAdd", opLen4(ssa.OpMaskedFusedMultiplyAddFloat32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Float32x16.MaskedFusedMultiplyAdd", opLen4(ssa.OpMaskedFusedMultiplyAddFloat32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Float64x2.MaskedFusedMultiplyAdd", opLen4(ssa.OpMaskedFusedMultiplyAddFloat64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Float64x4.MaskedFusedMultiplyAdd", opLen4(ssa.OpMaskedFusedMultiplyAddFloat64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Float64x8.MaskedFusedMultiplyAdd", opLen4(ssa.OpMaskedFusedMultiplyAddFloat64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Float32x4.MaskedFusedMultiplyAddSub", opLen4(ssa.OpMaskedFusedMultiplyAddSubFloat32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Float32x8.MaskedFusedMultiplyAddSub", opLen4(ssa.OpMaskedFusedMultiplyAddSubFloat32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Float32x16.MaskedFusedMultiplyAddSub", opLen4(ssa.OpMaskedFusedMultiplyAddSubFloat32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Float64x2.MaskedFusedMultiplyAddSub", opLen4(ssa.OpMaskedFusedMultiplyAddSubFloat64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Float64x4.MaskedFusedMultiplyAddSub", opLen4(ssa.OpMaskedFusedMultiplyAddSubFloat64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Float64x8.MaskedFusedMultiplyAddSub", opLen4(ssa.OpMaskedFusedMultiplyAddSubFloat64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Float32x4.MaskedFusedMultiplySubAdd", opLen4(ssa.OpMaskedFusedMultiplySubAddFloat32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Float32x8.MaskedFusedMultiplySubAdd", opLen4(ssa.OpMaskedFusedMultiplySubAddFloat32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Float32x16.MaskedFusedMultiplySubAdd", opLen4(ssa.OpMaskedFusedMultiplySubAddFloat32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Float64x2.MaskedFusedMultiplySubAdd", opLen4(ssa.OpMaskedFusedMultiplySubAddFloat64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Float64x4.MaskedFusedMultiplySubAdd", opLen4(ssa.OpMaskedFusedMultiplySubAddFloat64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Float64x8.MaskedFusedMultiplySubAdd", opLen4(ssa.OpMaskedFusedMultiplySubAddFloat64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint8x16.MaskedGaloisFieldAffineTransform", opLen3Imm8_2I(ssa.OpMaskedGaloisFieldAffineTransformUint8x16, types.TypeVec128, 0), sys.AMD64) - addF(simdPackage, "Uint8x32.MaskedGaloisFieldAffineTransform", opLen3Imm8_2I(ssa.OpMaskedGaloisFieldAffineTransformUint8x32, types.TypeVec256, 0), sys.AMD64) - addF(simdPackage, "Uint8x64.MaskedGaloisFieldAffineTransform", opLen3Imm8_2I(ssa.OpMaskedGaloisFieldAffineTransformUint8x64, types.TypeVec512, 0), sys.AMD64) - addF(simdPackage, "Uint8x16.MaskedGaloisFieldAffineTransformInversed", opLen3Imm8_2I(ssa.OpMaskedGaloisFieldAffineTransformInversedUint8x16, types.TypeVec128, 0), sys.AMD64) - addF(simdPackage, "Uint8x32.MaskedGaloisFieldAffineTransformInversed", opLen3Imm8_2I(ssa.OpMaskedGaloisFieldAffineTransformInversedUint8x32, types.TypeVec256, 0), sys.AMD64) - addF(simdPackage, "Uint8x64.MaskedGaloisFieldAffineTransformInversed", opLen3Imm8_2I(ssa.OpMaskedGaloisFieldAffineTransformInversedUint8x64, types.TypeVec512, 0), sys.AMD64) - addF(simdPackage, "Uint8x16.MaskedGaloisFieldMul", opLen3(ssa.OpMaskedGaloisFieldMulUint8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint8x32.MaskedGaloisFieldMul", opLen3(ssa.OpMaskedGaloisFieldMulUint8x32, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint8x64.MaskedGaloisFieldMul", opLen3(ssa.OpMaskedGaloisFieldMulUint8x64, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Float32x4.MaskedGreater", opLen3(ssa.OpMaskedGreaterFloat32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Float32x8.MaskedGreater", opLen3(ssa.OpMaskedGreaterFloat32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Float32x16.MaskedGreater", opLen3(ssa.OpMaskedGreaterFloat32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Float64x2.MaskedGreater", opLen3(ssa.OpMaskedGreaterFloat64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Float64x4.MaskedGreater", opLen3(ssa.OpMaskedGreaterFloat64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Float64x8.MaskedGreater", opLen3(ssa.OpMaskedGreaterFloat64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int8x16.MaskedGreater", opLen3(ssa.OpMaskedGreaterInt8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int8x32.MaskedGreater", opLen3(ssa.OpMaskedGreaterInt8x32, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int8x64.MaskedGreater", opLen3(ssa.OpMaskedGreaterInt8x64, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int16x8.MaskedGreater", opLen3(ssa.OpMaskedGreaterInt16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int16x16.MaskedGreater", opLen3(ssa.OpMaskedGreaterInt16x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int16x32.MaskedGreater", opLen3(ssa.OpMaskedGreaterInt16x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int32x4.MaskedGreater", opLen3(ssa.OpMaskedGreaterInt32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x8.MaskedGreater", opLen3(ssa.OpMaskedGreaterInt32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int32x16.MaskedGreater", opLen3(ssa.OpMaskedGreaterInt32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int64x2.MaskedGreater", opLen3(ssa.OpMaskedGreaterInt64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int64x4.MaskedGreater", opLen3(ssa.OpMaskedGreaterInt64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int64x8.MaskedGreater", opLen3(ssa.OpMaskedGreaterInt64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint8x16.MaskedGreater", opLen3(ssa.OpMaskedGreaterUint8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint8x32.MaskedGreater", opLen3(ssa.OpMaskedGreaterUint8x32, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint8x64.MaskedGreater", opLen3(ssa.OpMaskedGreaterUint8x64, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint16x8.MaskedGreater", opLen3(ssa.OpMaskedGreaterUint16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint16x16.MaskedGreater", opLen3(ssa.OpMaskedGreaterUint16x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint16x32.MaskedGreater", opLen3(ssa.OpMaskedGreaterUint16x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint32x4.MaskedGreater", opLen3(ssa.OpMaskedGreaterUint32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint32x8.MaskedGreater", opLen3(ssa.OpMaskedGreaterUint32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint32x16.MaskedGreater", opLen3(ssa.OpMaskedGreaterUint32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint64x2.MaskedGreater", opLen3(ssa.OpMaskedGreaterUint64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint64x4.MaskedGreater", opLen3(ssa.OpMaskedGreaterUint64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint64x8.MaskedGreater", opLen3(ssa.OpMaskedGreaterUint64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Float32x4.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualFloat32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Float32x8.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualFloat32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Float32x16.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualFloat32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Float64x2.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualFloat64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Float64x4.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualFloat64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Float64x8.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualFloat64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int8x16.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualInt8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int8x32.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualInt8x32, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int8x64.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualInt8x64, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int16x8.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualInt16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int16x16.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualInt16x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int16x32.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualInt16x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int32x4.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualInt32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x8.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualInt32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int32x16.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualInt32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int64x2.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualInt64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int64x4.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualInt64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int64x8.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualInt64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint8x16.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualUint8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint8x32.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualUint8x32, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint8x64.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualUint8x64, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint16x8.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualUint16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint16x16.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualUint16x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint16x32.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualUint16x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint32x4.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualUint32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint32x8.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualUint32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint32x16.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualUint32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint64x2.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualUint64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint64x4.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualUint64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint64x8.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualUint64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Float32x4.MaskedIsNan", opLen3(ssa.OpMaskedIsNanFloat32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Float32x8.MaskedIsNan", opLen3(ssa.OpMaskedIsNanFloat32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Float32x16.MaskedIsNan", opLen3(ssa.OpMaskedIsNanFloat32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Float64x2.MaskedIsNan", opLen3(ssa.OpMaskedIsNanFloat64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Float64x4.MaskedIsNan", opLen3(ssa.OpMaskedIsNanFloat64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Float64x8.MaskedIsNan", opLen3(ssa.OpMaskedIsNanFloat64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Float32x4.MaskedLess", opLen3(ssa.OpMaskedLessFloat32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Float32x8.MaskedLess", opLen3(ssa.OpMaskedLessFloat32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Float32x16.MaskedLess", opLen3(ssa.OpMaskedLessFloat32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Float64x2.MaskedLess", opLen3(ssa.OpMaskedLessFloat64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Float64x4.MaskedLess", opLen3(ssa.OpMaskedLessFloat64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Float64x8.MaskedLess", opLen3(ssa.OpMaskedLessFloat64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int8x16.MaskedLess", opLen3(ssa.OpMaskedLessInt8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int8x32.MaskedLess", opLen3(ssa.OpMaskedLessInt8x32, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int8x64.MaskedLess", opLen3(ssa.OpMaskedLessInt8x64, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int16x8.MaskedLess", opLen3(ssa.OpMaskedLessInt16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int16x16.MaskedLess", opLen3(ssa.OpMaskedLessInt16x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int16x32.MaskedLess", opLen3(ssa.OpMaskedLessInt16x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int32x4.MaskedLess", opLen3(ssa.OpMaskedLessInt32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x8.MaskedLess", opLen3(ssa.OpMaskedLessInt32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int32x16.MaskedLess", opLen3(ssa.OpMaskedLessInt32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int64x2.MaskedLess", opLen3(ssa.OpMaskedLessInt64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int64x4.MaskedLess", opLen3(ssa.OpMaskedLessInt64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int64x8.MaskedLess", opLen3(ssa.OpMaskedLessInt64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint8x16.MaskedLess", opLen3(ssa.OpMaskedLessUint8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint8x32.MaskedLess", opLen3(ssa.OpMaskedLessUint8x32, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint8x64.MaskedLess", opLen3(ssa.OpMaskedLessUint8x64, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint16x8.MaskedLess", opLen3(ssa.OpMaskedLessUint16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint16x16.MaskedLess", opLen3(ssa.OpMaskedLessUint16x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint16x32.MaskedLess", opLen3(ssa.OpMaskedLessUint16x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint32x4.MaskedLess", opLen3(ssa.OpMaskedLessUint32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint32x8.MaskedLess", opLen3(ssa.OpMaskedLessUint32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint32x16.MaskedLess", opLen3(ssa.OpMaskedLessUint32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint64x2.MaskedLess", opLen3(ssa.OpMaskedLessUint64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint64x4.MaskedLess", opLen3(ssa.OpMaskedLessUint64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint64x8.MaskedLess", opLen3(ssa.OpMaskedLessUint64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Float32x4.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualFloat32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Float32x8.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualFloat32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Float32x16.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualFloat32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Float64x2.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualFloat64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Float64x4.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualFloat64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Float64x8.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualFloat64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int8x16.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualInt8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int8x32.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualInt8x32, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int8x64.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualInt8x64, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int16x8.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualInt16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int16x16.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualInt16x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int16x32.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualInt16x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int32x4.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualInt32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x8.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualInt32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int32x16.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualInt32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int64x2.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualInt64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int64x4.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualInt64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int64x8.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualInt64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint8x16.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualUint8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint8x32.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualUint8x32, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint8x64.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualUint8x64, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint16x8.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualUint16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint16x16.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualUint16x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint16x32.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualUint16x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint32x4.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualUint32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint32x8.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualUint32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint32x16.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualUint32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint64x2.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualUint64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint64x4.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualUint64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint64x8.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualUint64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Float32x4.MaskedMax", opLen3(ssa.OpMaskedMaxFloat32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Float32x8.MaskedMax", opLen3(ssa.OpMaskedMaxFloat32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Float32x16.MaskedMax", opLen3(ssa.OpMaskedMaxFloat32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Float64x2.MaskedMax", opLen3(ssa.OpMaskedMaxFloat64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Float64x4.MaskedMax", opLen3(ssa.OpMaskedMaxFloat64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Float64x8.MaskedMax", opLen3(ssa.OpMaskedMaxFloat64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int8x16.MaskedMax", opLen3(ssa.OpMaskedMaxInt8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int8x32.MaskedMax", opLen3(ssa.OpMaskedMaxInt8x32, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int8x64.MaskedMax", opLen3(ssa.OpMaskedMaxInt8x64, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int16x8.MaskedMax", opLen3(ssa.OpMaskedMaxInt16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int16x16.MaskedMax", opLen3(ssa.OpMaskedMaxInt16x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int16x32.MaskedMax", opLen3(ssa.OpMaskedMaxInt16x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int32x4.MaskedMax", opLen3(ssa.OpMaskedMaxInt32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x8.MaskedMax", opLen3(ssa.OpMaskedMaxInt32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int32x16.MaskedMax", opLen3(ssa.OpMaskedMaxInt32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int64x2.MaskedMax", opLen3(ssa.OpMaskedMaxInt64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int64x4.MaskedMax", opLen3(ssa.OpMaskedMaxInt64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int64x8.MaskedMax", opLen3(ssa.OpMaskedMaxInt64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint8x16.MaskedMax", opLen3(ssa.OpMaskedMaxUint8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint8x32.MaskedMax", opLen3(ssa.OpMaskedMaxUint8x32, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint8x64.MaskedMax", opLen3(ssa.OpMaskedMaxUint8x64, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint16x8.MaskedMax", opLen3(ssa.OpMaskedMaxUint16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint16x16.MaskedMax", opLen3(ssa.OpMaskedMaxUint16x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint16x32.MaskedMax", opLen3(ssa.OpMaskedMaxUint16x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint32x4.MaskedMax", opLen3(ssa.OpMaskedMaxUint32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint32x8.MaskedMax", opLen3(ssa.OpMaskedMaxUint32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint32x16.MaskedMax", opLen3(ssa.OpMaskedMaxUint32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint64x2.MaskedMax", opLen3(ssa.OpMaskedMaxUint64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint64x4.MaskedMax", opLen3(ssa.OpMaskedMaxUint64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint64x8.MaskedMax", opLen3(ssa.OpMaskedMaxUint64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Float32x4.MaskedMin", opLen3(ssa.OpMaskedMinFloat32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Float32x8.MaskedMin", opLen3(ssa.OpMaskedMinFloat32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Float32x16.MaskedMin", opLen3(ssa.OpMaskedMinFloat32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Float64x2.MaskedMin", opLen3(ssa.OpMaskedMinFloat64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Float64x4.MaskedMin", opLen3(ssa.OpMaskedMinFloat64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Float64x8.MaskedMin", opLen3(ssa.OpMaskedMinFloat64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int8x16.MaskedMin", opLen3(ssa.OpMaskedMinInt8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int8x32.MaskedMin", opLen3(ssa.OpMaskedMinInt8x32, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int8x64.MaskedMin", opLen3(ssa.OpMaskedMinInt8x64, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int16x8.MaskedMin", opLen3(ssa.OpMaskedMinInt16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int16x16.MaskedMin", opLen3(ssa.OpMaskedMinInt16x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int16x32.MaskedMin", opLen3(ssa.OpMaskedMinInt16x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int32x4.MaskedMin", opLen3(ssa.OpMaskedMinInt32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x8.MaskedMin", opLen3(ssa.OpMaskedMinInt32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int32x16.MaskedMin", opLen3(ssa.OpMaskedMinInt32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int64x2.MaskedMin", opLen3(ssa.OpMaskedMinInt64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int64x4.MaskedMin", opLen3(ssa.OpMaskedMinInt64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int64x8.MaskedMin", opLen3(ssa.OpMaskedMinInt64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint8x16.MaskedMin", opLen3(ssa.OpMaskedMinUint8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint8x32.MaskedMin", opLen3(ssa.OpMaskedMinUint8x32, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint8x64.MaskedMin", opLen3(ssa.OpMaskedMinUint8x64, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint16x8.MaskedMin", opLen3(ssa.OpMaskedMinUint16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint16x16.MaskedMin", opLen3(ssa.OpMaskedMinUint16x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint16x32.MaskedMin", opLen3(ssa.OpMaskedMinUint16x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint32x4.MaskedMin", opLen3(ssa.OpMaskedMinUint32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint32x8.MaskedMin", opLen3(ssa.OpMaskedMinUint32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint32x16.MaskedMin", opLen3(ssa.OpMaskedMinUint32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint64x2.MaskedMin", opLen3(ssa.OpMaskedMinUint64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint64x4.MaskedMin", opLen3(ssa.OpMaskedMinUint64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint64x8.MaskedMin", opLen3(ssa.OpMaskedMinUint64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Float32x4.MaskedMul", opLen3(ssa.OpMaskedMulFloat32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Float32x8.MaskedMul", opLen3(ssa.OpMaskedMulFloat32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Float32x16.MaskedMul", opLen3(ssa.OpMaskedMulFloat32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Float64x2.MaskedMul", opLen3(ssa.OpMaskedMulFloat64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Float64x4.MaskedMul", opLen3(ssa.OpMaskedMulFloat64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Float64x8.MaskedMul", opLen3(ssa.OpMaskedMulFloat64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Float32x4.MaskedMulByPowOf2", opLen3(ssa.OpMaskedMulByPowOf2Float32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Float32x8.MaskedMulByPowOf2", opLen3(ssa.OpMaskedMulByPowOf2Float32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Float32x16.MaskedMulByPowOf2", opLen3(ssa.OpMaskedMulByPowOf2Float32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Float64x2.MaskedMulByPowOf2", opLen3(ssa.OpMaskedMulByPowOf2Float64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Float64x4.MaskedMulByPowOf2", opLen3(ssa.OpMaskedMulByPowOf2Float64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Float64x8.MaskedMulByPowOf2", opLen3(ssa.OpMaskedMulByPowOf2Float64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int64x2.MaskedMulEvenWiden", opLen3(ssa.OpMaskedMulEvenWidenInt64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int64x4.MaskedMulEvenWiden", opLen3(ssa.OpMaskedMulEvenWidenInt64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int64x8.MaskedMulEvenWiden", opLen3(ssa.OpMaskedMulEvenWidenInt64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint64x2.MaskedMulEvenWiden", opLen3(ssa.OpMaskedMulEvenWidenUint64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint64x4.MaskedMulEvenWiden", opLen3(ssa.OpMaskedMulEvenWidenUint64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint64x8.MaskedMulEvenWiden", opLen3(ssa.OpMaskedMulEvenWidenUint64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int16x8.MaskedMulHigh", opLen3(ssa.OpMaskedMulHighInt16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int16x16.MaskedMulHigh", opLen3(ssa.OpMaskedMulHighInt16x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int16x32.MaskedMulHigh", opLen3(ssa.OpMaskedMulHighInt16x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint16x8.MaskedMulHigh", opLen3(ssa.OpMaskedMulHighUint16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint16x16.MaskedMulHigh", opLen3(ssa.OpMaskedMulHighUint16x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint16x32.MaskedMulHigh", opLen3(ssa.OpMaskedMulHighUint16x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int16x8.MaskedMulLow", opLen3(ssa.OpMaskedMulLowInt16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int16x16.MaskedMulLow", opLen3(ssa.OpMaskedMulLowInt16x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int16x32.MaskedMulLow", opLen3(ssa.OpMaskedMulLowInt16x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int32x4.MaskedMulLow", opLen3(ssa.OpMaskedMulLowInt32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x8.MaskedMulLow", opLen3(ssa.OpMaskedMulLowInt32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int32x16.MaskedMulLow", opLen3(ssa.OpMaskedMulLowInt32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int64x2.MaskedMulLow", opLen3(ssa.OpMaskedMulLowInt64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int64x4.MaskedMulLow", opLen3(ssa.OpMaskedMulLowInt64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int64x8.MaskedMulLow", opLen3(ssa.OpMaskedMulLowInt64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Float32x4.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualFloat32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Float32x8.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualFloat32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Float32x16.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualFloat32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Float64x2.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualFloat64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Float64x4.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualFloat64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Float64x8.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualFloat64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int8x16.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualInt8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int8x32.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualInt8x32, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int8x64.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualInt8x64, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int16x8.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualInt16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int16x16.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualInt16x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int16x32.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualInt16x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int32x4.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualInt32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x8.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualInt32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int32x16.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualInt32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int64x2.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualInt64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int64x4.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualInt64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int64x8.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualInt64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint8x16.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualUint8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint8x32.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualUint8x32, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint8x64.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualUint8x64, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint16x8.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualUint16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint16x16.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualUint16x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint16x32.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualUint16x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint32x4.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualUint32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint32x8.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualUint32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint32x16.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualUint32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint64x2.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualUint64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint64x4.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualUint64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint64x8.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualUint64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int32x4.MaskedOr", opLen3(ssa.OpMaskedOrInt32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x8.MaskedOr", opLen3(ssa.OpMaskedOrInt32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int32x16.MaskedOr", opLen3(ssa.OpMaskedOrInt32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int64x2.MaskedOr", opLen3(ssa.OpMaskedOrInt64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int64x4.MaskedOr", opLen3(ssa.OpMaskedOrInt64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int64x8.MaskedOr", opLen3(ssa.OpMaskedOrInt64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint32x4.MaskedOr", opLen3(ssa.OpMaskedOrUint32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint32x8.MaskedOr", opLen3(ssa.OpMaskedOrUint32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint32x16.MaskedOr", opLen3(ssa.OpMaskedOrUint32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint64x2.MaskedOr", opLen3(ssa.OpMaskedOrUint64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint64x4.MaskedOr", opLen3(ssa.OpMaskedOrUint64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint64x8.MaskedOr", opLen3(ssa.OpMaskedOrUint64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int16x8.MaskedPairDotProd", opLen3(ssa.OpMaskedPairDotProdInt16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int16x16.MaskedPairDotProd", opLen3(ssa.OpMaskedPairDotProdInt16x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int16x32.MaskedPairDotProd", opLen3(ssa.OpMaskedPairDotProdInt16x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int32x4.MaskedPairDotProdAccumulate", opLen4(ssa.OpMaskedPairDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x8.MaskedPairDotProdAccumulate", opLen4(ssa.OpMaskedPairDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int32x16.MaskedPairDotProdAccumulate", opLen4(ssa.OpMaskedPairDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int8x16.MaskedPopCount", opLen2(ssa.OpMaskedPopCountInt8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int8x32.MaskedPopCount", opLen2(ssa.OpMaskedPopCountInt8x32, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int8x64.MaskedPopCount", opLen2(ssa.OpMaskedPopCountInt8x64, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int16x8.MaskedPopCount", opLen2(ssa.OpMaskedPopCountInt16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int16x16.MaskedPopCount", opLen2(ssa.OpMaskedPopCountInt16x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int16x32.MaskedPopCount", opLen2(ssa.OpMaskedPopCountInt16x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int32x4.MaskedPopCount", opLen2(ssa.OpMaskedPopCountInt32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x8.MaskedPopCount", opLen2(ssa.OpMaskedPopCountInt32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int32x16.MaskedPopCount", opLen2(ssa.OpMaskedPopCountInt32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int64x2.MaskedPopCount", opLen2(ssa.OpMaskedPopCountInt64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int64x4.MaskedPopCount", opLen2(ssa.OpMaskedPopCountInt64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int64x8.MaskedPopCount", opLen2(ssa.OpMaskedPopCountInt64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint8x16.MaskedPopCount", opLen2(ssa.OpMaskedPopCountUint8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint8x32.MaskedPopCount", opLen2(ssa.OpMaskedPopCountUint8x32, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint8x64.MaskedPopCount", opLen2(ssa.OpMaskedPopCountUint8x64, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint16x8.MaskedPopCount", opLen2(ssa.OpMaskedPopCountUint16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint16x16.MaskedPopCount", opLen2(ssa.OpMaskedPopCountUint16x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint16x32.MaskedPopCount", opLen2(ssa.OpMaskedPopCountUint16x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint32x4.MaskedPopCount", opLen2(ssa.OpMaskedPopCountUint32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint32x8.MaskedPopCount", opLen2(ssa.OpMaskedPopCountUint32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint32x16.MaskedPopCount", opLen2(ssa.OpMaskedPopCountUint32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint64x2.MaskedPopCount", opLen2(ssa.OpMaskedPopCountUint64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint64x4.MaskedPopCount", opLen2(ssa.OpMaskedPopCountUint64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint64x8.MaskedPopCount", opLen2(ssa.OpMaskedPopCountUint64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int32x4.MaskedRotateAllLeft", opLen2Imm8(ssa.OpMaskedRotateAllLeftInt32x4, types.TypeVec128, 0), sys.AMD64) - addF(simdPackage, "Int32x8.MaskedRotateAllLeft", opLen2Imm8(ssa.OpMaskedRotateAllLeftInt32x8, types.TypeVec256, 0), sys.AMD64) - addF(simdPackage, "Int32x16.MaskedRotateAllLeft", opLen2Imm8(ssa.OpMaskedRotateAllLeftInt32x16, types.TypeVec512, 0), sys.AMD64) - addF(simdPackage, "Int64x2.MaskedRotateAllLeft", opLen2Imm8(ssa.OpMaskedRotateAllLeftInt64x2, types.TypeVec128, 0), sys.AMD64) - addF(simdPackage, "Int64x4.MaskedRotateAllLeft", opLen2Imm8(ssa.OpMaskedRotateAllLeftInt64x4, types.TypeVec256, 0), sys.AMD64) - addF(simdPackage, "Int64x8.MaskedRotateAllLeft", opLen2Imm8(ssa.OpMaskedRotateAllLeftInt64x8, types.TypeVec512, 0), sys.AMD64) - addF(simdPackage, "Uint32x4.MaskedRotateAllLeft", opLen2Imm8(ssa.OpMaskedRotateAllLeftUint32x4, types.TypeVec128, 0), sys.AMD64) - addF(simdPackage, "Uint32x8.MaskedRotateAllLeft", opLen2Imm8(ssa.OpMaskedRotateAllLeftUint32x8, types.TypeVec256, 0), sys.AMD64) - addF(simdPackage, "Uint32x16.MaskedRotateAllLeft", opLen2Imm8(ssa.OpMaskedRotateAllLeftUint32x16, types.TypeVec512, 0), sys.AMD64) - addF(simdPackage, "Uint64x2.MaskedRotateAllLeft", opLen2Imm8(ssa.OpMaskedRotateAllLeftUint64x2, types.TypeVec128, 0), sys.AMD64) - addF(simdPackage, "Uint64x4.MaskedRotateAllLeft", opLen2Imm8(ssa.OpMaskedRotateAllLeftUint64x4, types.TypeVec256, 0), sys.AMD64) - addF(simdPackage, "Uint64x8.MaskedRotateAllLeft", opLen2Imm8(ssa.OpMaskedRotateAllLeftUint64x8, types.TypeVec512, 0), sys.AMD64) - addF(simdPackage, "Int32x4.MaskedRotateAllRight", opLen2Imm8(ssa.OpMaskedRotateAllRightInt32x4, types.TypeVec128, 0), sys.AMD64) - addF(simdPackage, "Int32x8.MaskedRotateAllRight", opLen2Imm8(ssa.OpMaskedRotateAllRightInt32x8, types.TypeVec256, 0), sys.AMD64) - addF(simdPackage, "Int32x16.MaskedRotateAllRight", opLen2Imm8(ssa.OpMaskedRotateAllRightInt32x16, types.TypeVec512, 0), sys.AMD64) - addF(simdPackage, "Int64x2.MaskedRotateAllRight", opLen2Imm8(ssa.OpMaskedRotateAllRightInt64x2, types.TypeVec128, 0), sys.AMD64) - addF(simdPackage, "Int64x4.MaskedRotateAllRight", opLen2Imm8(ssa.OpMaskedRotateAllRightInt64x4, types.TypeVec256, 0), sys.AMD64) - addF(simdPackage, "Int64x8.MaskedRotateAllRight", opLen2Imm8(ssa.OpMaskedRotateAllRightInt64x8, types.TypeVec512, 0), sys.AMD64) - addF(simdPackage, "Uint32x4.MaskedRotateAllRight", opLen2Imm8(ssa.OpMaskedRotateAllRightUint32x4, types.TypeVec128, 0), sys.AMD64) - addF(simdPackage, "Uint32x8.MaskedRotateAllRight", opLen2Imm8(ssa.OpMaskedRotateAllRightUint32x8, types.TypeVec256, 0), sys.AMD64) - addF(simdPackage, "Uint32x16.MaskedRotateAllRight", opLen2Imm8(ssa.OpMaskedRotateAllRightUint32x16, types.TypeVec512, 0), sys.AMD64) - addF(simdPackage, "Uint64x2.MaskedRotateAllRight", opLen2Imm8(ssa.OpMaskedRotateAllRightUint64x2, types.TypeVec128, 0), sys.AMD64) - addF(simdPackage, "Uint64x4.MaskedRotateAllRight", opLen2Imm8(ssa.OpMaskedRotateAllRightUint64x4, types.TypeVec256, 0), sys.AMD64) - addF(simdPackage, "Uint64x8.MaskedRotateAllRight", opLen2Imm8(ssa.OpMaskedRotateAllRightUint64x8, types.TypeVec512, 0), sys.AMD64) - addF(simdPackage, "Int32x4.MaskedRotateLeft", opLen3(ssa.OpMaskedRotateLeftInt32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x8.MaskedRotateLeft", opLen3(ssa.OpMaskedRotateLeftInt32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int32x16.MaskedRotateLeft", opLen3(ssa.OpMaskedRotateLeftInt32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int64x2.MaskedRotateLeft", opLen3(ssa.OpMaskedRotateLeftInt64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int64x4.MaskedRotateLeft", opLen3(ssa.OpMaskedRotateLeftInt64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int64x8.MaskedRotateLeft", opLen3(ssa.OpMaskedRotateLeftInt64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint32x4.MaskedRotateLeft", opLen3(ssa.OpMaskedRotateLeftUint32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint32x8.MaskedRotateLeft", opLen3(ssa.OpMaskedRotateLeftUint32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint32x16.MaskedRotateLeft", opLen3(ssa.OpMaskedRotateLeftUint32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint64x2.MaskedRotateLeft", opLen3(ssa.OpMaskedRotateLeftUint64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint64x4.MaskedRotateLeft", opLen3(ssa.OpMaskedRotateLeftUint64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint64x8.MaskedRotateLeft", opLen3(ssa.OpMaskedRotateLeftUint64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int32x4.MaskedRotateRight", opLen3(ssa.OpMaskedRotateRightInt32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x8.MaskedRotateRight", opLen3(ssa.OpMaskedRotateRightInt32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int32x16.MaskedRotateRight", opLen3(ssa.OpMaskedRotateRightInt32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int64x2.MaskedRotateRight", opLen3(ssa.OpMaskedRotateRightInt64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int64x4.MaskedRotateRight", opLen3(ssa.OpMaskedRotateRightInt64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int64x8.MaskedRotateRight", opLen3(ssa.OpMaskedRotateRightInt64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint32x4.MaskedRotateRight", opLen3(ssa.OpMaskedRotateRightUint32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint32x8.MaskedRotateRight", opLen3(ssa.OpMaskedRotateRightUint32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint32x16.MaskedRotateRight", opLen3(ssa.OpMaskedRotateRightUint32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint64x2.MaskedRotateRight", opLen3(ssa.OpMaskedRotateRightUint64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint64x4.MaskedRotateRight", opLen3(ssa.OpMaskedRotateRightUint64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint64x8.MaskedRotateRight", opLen3(ssa.OpMaskedRotateRightUint64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Float32x4.MaskedRoundWithPrecision", opLen2Imm8(ssa.OpMaskedRoundWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) - addF(simdPackage, "Float32x8.MaskedRoundWithPrecision", opLen2Imm8(ssa.OpMaskedRoundWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) - addF(simdPackage, "Float32x16.MaskedRoundWithPrecision", opLen2Imm8(ssa.OpMaskedRoundWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) - addF(simdPackage, "Float64x2.MaskedRoundWithPrecision", opLen2Imm8(ssa.OpMaskedRoundWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) - addF(simdPackage, "Float64x4.MaskedRoundWithPrecision", opLen2Imm8(ssa.OpMaskedRoundWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) - addF(simdPackage, "Float64x8.MaskedRoundWithPrecision", opLen2Imm8(ssa.OpMaskedRoundWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) - addF(simdPackage, "Int8x16.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddInt8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int8x32.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddInt8x32, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int8x64.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddInt8x64, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int16x8.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddInt16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int16x16.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddInt16x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int16x32.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddInt16x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint8x16.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddUint8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint8x32.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddUint8x32, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint8x64.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddUint8x64, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint16x8.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddUint16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint16x16.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddUint16x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint16x32.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddUint16x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int32x4.MaskedSaturatedPairDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedPairDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x8.MaskedSaturatedPairDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedPairDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int32x16.MaskedSaturatedPairDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedPairDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int8x16.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubInt8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int8x32.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubInt8x32, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int8x64.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubInt8x64, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int16x8.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubInt16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int16x16.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubInt16x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int16x32.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubInt16x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint8x16.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubUint8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint8x32.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubUint8x32, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint8x64.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubUint8x64, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint16x8.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubUint16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint16x16.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubUint16x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint16x32.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubUint16x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint8x16.MaskedSaturatedUnsignedSignedPairDotProd", opLen3(ssa.OpMaskedSaturatedUnsignedSignedPairDotProdUint8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint8x32.MaskedSaturatedUnsignedSignedPairDotProd", opLen3(ssa.OpMaskedSaturatedUnsignedSignedPairDotProdUint8x32, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint8x64.MaskedSaturatedUnsignedSignedPairDotProd", opLen3(ssa.OpMaskedSaturatedUnsignedSignedPairDotProdUint8x64, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int32x4.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x8.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int32x16.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint32x4.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint32x8.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint32x16.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int64x2.MaskedShiftAllLeft", opLen3(ssa.OpMaskedShiftAllLeftInt64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int64x4.MaskedShiftAllLeft", opLen3(ssa.OpMaskedShiftAllLeftInt64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int64x8.MaskedShiftAllLeft", opLen3(ssa.OpMaskedShiftAllLeftInt64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint64x2.MaskedShiftAllLeft", opLen3(ssa.OpMaskedShiftAllLeftUint64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint64x4.MaskedShiftAllLeft", opLen3(ssa.OpMaskedShiftAllLeftUint64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint64x8.MaskedShiftAllLeft", opLen3(ssa.OpMaskedShiftAllLeftUint64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int16x8.MaskedShiftAllLeftAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllLeftAndFillUpperFromInt16x8, types.TypeVec128, 0), sys.AMD64) - addF(simdPackage, "Int16x16.MaskedShiftAllLeftAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllLeftAndFillUpperFromInt16x16, types.TypeVec256, 0), sys.AMD64) - addF(simdPackage, "Int16x32.MaskedShiftAllLeftAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllLeftAndFillUpperFromInt16x32, types.TypeVec512, 0), sys.AMD64) - addF(simdPackage, "Int32x4.MaskedShiftAllLeftAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllLeftAndFillUpperFromInt32x4, types.TypeVec128, 0), sys.AMD64) - addF(simdPackage, "Int32x8.MaskedShiftAllLeftAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllLeftAndFillUpperFromInt32x8, types.TypeVec256, 0), sys.AMD64) - addF(simdPackage, "Int32x16.MaskedShiftAllLeftAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllLeftAndFillUpperFromInt32x16, types.TypeVec512, 0), sys.AMD64) - addF(simdPackage, "Int64x2.MaskedShiftAllLeftAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllLeftAndFillUpperFromInt64x2, types.TypeVec128, 0), sys.AMD64) - addF(simdPackage, "Int64x4.MaskedShiftAllLeftAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllLeftAndFillUpperFromInt64x4, types.TypeVec256, 0), sys.AMD64) - addF(simdPackage, "Int64x8.MaskedShiftAllLeftAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllLeftAndFillUpperFromInt64x8, types.TypeVec512, 0), sys.AMD64) - addF(simdPackage, "Uint16x8.MaskedShiftAllLeftAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllLeftAndFillUpperFromUint16x8, types.TypeVec128, 0), sys.AMD64) - addF(simdPackage, "Uint16x16.MaskedShiftAllLeftAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllLeftAndFillUpperFromUint16x16, types.TypeVec256, 0), sys.AMD64) - addF(simdPackage, "Uint16x32.MaskedShiftAllLeftAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllLeftAndFillUpperFromUint16x32, types.TypeVec512, 0), sys.AMD64) - addF(simdPackage, "Uint32x4.MaskedShiftAllLeftAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllLeftAndFillUpperFromUint32x4, types.TypeVec128, 0), sys.AMD64) - addF(simdPackage, "Uint32x8.MaskedShiftAllLeftAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllLeftAndFillUpperFromUint32x8, types.TypeVec256, 0), sys.AMD64) - addF(simdPackage, "Uint32x16.MaskedShiftAllLeftAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllLeftAndFillUpperFromUint32x16, types.TypeVec512, 0), sys.AMD64) - addF(simdPackage, "Uint64x2.MaskedShiftAllLeftAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllLeftAndFillUpperFromUint64x2, types.TypeVec128, 0), sys.AMD64) - addF(simdPackage, "Uint64x4.MaskedShiftAllLeftAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllLeftAndFillUpperFromUint64x4, types.TypeVec256, 0), sys.AMD64) - addF(simdPackage, "Uint64x8.MaskedShiftAllLeftAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllLeftAndFillUpperFromUint64x8, types.TypeVec512, 0), sys.AMD64) - addF(simdPackage, "Int64x2.MaskedShiftAllRight", opLen3(ssa.OpMaskedShiftAllRightInt64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int64x4.MaskedShiftAllRight", opLen3(ssa.OpMaskedShiftAllRightInt64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int64x8.MaskedShiftAllRight", opLen3(ssa.OpMaskedShiftAllRightInt64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint64x2.MaskedShiftAllRight", opLen3(ssa.OpMaskedShiftAllRightUint64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint64x4.MaskedShiftAllRight", opLen3(ssa.OpMaskedShiftAllRightUint64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint64x8.MaskedShiftAllRight", opLen3(ssa.OpMaskedShiftAllRightUint64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int16x8.MaskedShiftAllRightAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllRightAndFillUpperFromInt16x8, types.TypeVec128, 0), sys.AMD64) - addF(simdPackage, "Int16x16.MaskedShiftAllRightAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllRightAndFillUpperFromInt16x16, types.TypeVec256, 0), sys.AMD64) - addF(simdPackage, "Int16x32.MaskedShiftAllRightAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllRightAndFillUpperFromInt16x32, types.TypeVec512, 0), sys.AMD64) - addF(simdPackage, "Int32x4.MaskedShiftAllRightAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllRightAndFillUpperFromInt32x4, types.TypeVec128, 0), sys.AMD64) - addF(simdPackage, "Int32x8.MaskedShiftAllRightAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllRightAndFillUpperFromInt32x8, types.TypeVec256, 0), sys.AMD64) - addF(simdPackage, "Int32x16.MaskedShiftAllRightAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllRightAndFillUpperFromInt32x16, types.TypeVec512, 0), sys.AMD64) - addF(simdPackage, "Int64x2.MaskedShiftAllRightAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllRightAndFillUpperFromInt64x2, types.TypeVec128, 0), sys.AMD64) - addF(simdPackage, "Int64x4.MaskedShiftAllRightAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllRightAndFillUpperFromInt64x4, types.TypeVec256, 0), sys.AMD64) - addF(simdPackage, "Int64x8.MaskedShiftAllRightAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllRightAndFillUpperFromInt64x8, types.TypeVec512, 0), sys.AMD64) - addF(simdPackage, "Uint16x8.MaskedShiftAllRightAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllRightAndFillUpperFromUint16x8, types.TypeVec128, 0), sys.AMD64) - addF(simdPackage, "Uint16x16.MaskedShiftAllRightAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllRightAndFillUpperFromUint16x16, types.TypeVec256, 0), sys.AMD64) - addF(simdPackage, "Uint16x32.MaskedShiftAllRightAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllRightAndFillUpperFromUint16x32, types.TypeVec512, 0), sys.AMD64) - addF(simdPackage, "Uint32x4.MaskedShiftAllRightAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllRightAndFillUpperFromUint32x4, types.TypeVec128, 0), sys.AMD64) - addF(simdPackage, "Uint32x8.MaskedShiftAllRightAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllRightAndFillUpperFromUint32x8, types.TypeVec256, 0), sys.AMD64) - addF(simdPackage, "Uint32x16.MaskedShiftAllRightAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllRightAndFillUpperFromUint32x16, types.TypeVec512, 0), sys.AMD64) - addF(simdPackage, "Uint64x2.MaskedShiftAllRightAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllRightAndFillUpperFromUint64x2, types.TypeVec128, 0), sys.AMD64) - addF(simdPackage, "Uint64x4.MaskedShiftAllRightAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllRightAndFillUpperFromUint64x4, types.TypeVec256, 0), sys.AMD64) - addF(simdPackage, "Uint64x8.MaskedShiftAllRightAndFillUpperFrom", opLen3Imm8(ssa.OpMaskedShiftAllRightAndFillUpperFromUint64x8, types.TypeVec512, 0), sys.AMD64) - addF(simdPackage, "Int64x2.MaskedShiftAllRightSignExtended", opLen3(ssa.OpMaskedShiftAllRightSignExtendedInt64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int64x4.MaskedShiftAllRightSignExtended", opLen3(ssa.OpMaskedShiftAllRightSignExtendedInt64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int64x8.MaskedShiftAllRightSignExtended", opLen3(ssa.OpMaskedShiftAllRightSignExtendedInt64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int16x8.MaskedShiftLeft", opLen3(ssa.OpMaskedShiftLeftInt16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int16x16.MaskedShiftLeft", opLen3(ssa.OpMaskedShiftLeftInt16x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int16x32.MaskedShiftLeft", opLen3(ssa.OpMaskedShiftLeftInt16x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int32x4.MaskedShiftLeft", opLen3(ssa.OpMaskedShiftLeftInt32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x8.MaskedShiftLeft", opLen3(ssa.OpMaskedShiftLeftInt32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int32x16.MaskedShiftLeft", opLen3(ssa.OpMaskedShiftLeftInt32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int64x2.MaskedShiftLeft", opLen3(ssa.OpMaskedShiftLeftInt64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int64x4.MaskedShiftLeft", opLen3(ssa.OpMaskedShiftLeftInt64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int64x8.MaskedShiftLeft", opLen3(ssa.OpMaskedShiftLeftInt64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint16x8.MaskedShiftLeft", opLen3(ssa.OpMaskedShiftLeftUint16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint16x16.MaskedShiftLeft", opLen3(ssa.OpMaskedShiftLeftUint16x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint16x32.MaskedShiftLeft", opLen3(ssa.OpMaskedShiftLeftUint16x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint32x4.MaskedShiftLeft", opLen3(ssa.OpMaskedShiftLeftUint32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint32x8.MaskedShiftLeft", opLen3(ssa.OpMaskedShiftLeftUint32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint32x16.MaskedShiftLeft", opLen3(ssa.OpMaskedShiftLeftUint32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint64x2.MaskedShiftLeft", opLen3(ssa.OpMaskedShiftLeftUint64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint64x4.MaskedShiftLeft", opLen3(ssa.OpMaskedShiftLeftUint64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint64x8.MaskedShiftLeft", opLen3(ssa.OpMaskedShiftLeftUint64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int16x8.MaskedShiftLeftAndFillUpperFrom", opLen4(ssa.OpMaskedShiftLeftAndFillUpperFromInt16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int16x16.MaskedShiftLeftAndFillUpperFrom", opLen4(ssa.OpMaskedShiftLeftAndFillUpperFromInt16x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int16x32.MaskedShiftLeftAndFillUpperFrom", opLen4(ssa.OpMaskedShiftLeftAndFillUpperFromInt16x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int32x4.MaskedShiftLeftAndFillUpperFrom", opLen4(ssa.OpMaskedShiftLeftAndFillUpperFromInt32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x8.MaskedShiftLeftAndFillUpperFrom", opLen4(ssa.OpMaskedShiftLeftAndFillUpperFromInt32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int32x16.MaskedShiftLeftAndFillUpperFrom", opLen4(ssa.OpMaskedShiftLeftAndFillUpperFromInt32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int64x2.MaskedShiftLeftAndFillUpperFrom", opLen4(ssa.OpMaskedShiftLeftAndFillUpperFromInt64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int64x4.MaskedShiftLeftAndFillUpperFrom", opLen4(ssa.OpMaskedShiftLeftAndFillUpperFromInt64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int64x8.MaskedShiftLeftAndFillUpperFrom", opLen4(ssa.OpMaskedShiftLeftAndFillUpperFromInt64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint16x8.MaskedShiftLeftAndFillUpperFrom", opLen4(ssa.OpMaskedShiftLeftAndFillUpperFromUint16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint16x16.MaskedShiftLeftAndFillUpperFrom", opLen4(ssa.OpMaskedShiftLeftAndFillUpperFromUint16x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint16x32.MaskedShiftLeftAndFillUpperFrom", opLen4(ssa.OpMaskedShiftLeftAndFillUpperFromUint16x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint32x4.MaskedShiftLeftAndFillUpperFrom", opLen4(ssa.OpMaskedShiftLeftAndFillUpperFromUint32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint32x8.MaskedShiftLeftAndFillUpperFrom", opLen4(ssa.OpMaskedShiftLeftAndFillUpperFromUint32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint32x16.MaskedShiftLeftAndFillUpperFrom", opLen4(ssa.OpMaskedShiftLeftAndFillUpperFromUint32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint64x2.MaskedShiftLeftAndFillUpperFrom", opLen4(ssa.OpMaskedShiftLeftAndFillUpperFromUint64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint64x4.MaskedShiftLeftAndFillUpperFrom", opLen4(ssa.OpMaskedShiftLeftAndFillUpperFromUint64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint64x8.MaskedShiftLeftAndFillUpperFrom", opLen4(ssa.OpMaskedShiftLeftAndFillUpperFromUint64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int16x8.MaskedShiftRight", opLen3(ssa.OpMaskedShiftRightInt16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int16x16.MaskedShiftRight", opLen3(ssa.OpMaskedShiftRightInt16x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int16x32.MaskedShiftRight", opLen3(ssa.OpMaskedShiftRightInt16x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int32x4.MaskedShiftRight", opLen3(ssa.OpMaskedShiftRightInt32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x8.MaskedShiftRight", opLen3(ssa.OpMaskedShiftRightInt32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int32x16.MaskedShiftRight", opLen3(ssa.OpMaskedShiftRightInt32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int64x2.MaskedShiftRight", opLen3(ssa.OpMaskedShiftRightInt64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int64x4.MaskedShiftRight", opLen3(ssa.OpMaskedShiftRightInt64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int64x8.MaskedShiftRight", opLen3(ssa.OpMaskedShiftRightInt64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint16x8.MaskedShiftRight", opLen3(ssa.OpMaskedShiftRightUint16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint16x16.MaskedShiftRight", opLen3(ssa.OpMaskedShiftRightUint16x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint16x32.MaskedShiftRight", opLen3(ssa.OpMaskedShiftRightUint16x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint32x4.MaskedShiftRight", opLen3(ssa.OpMaskedShiftRightUint32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint32x8.MaskedShiftRight", opLen3(ssa.OpMaskedShiftRightUint32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint32x16.MaskedShiftRight", opLen3(ssa.OpMaskedShiftRightUint32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint64x2.MaskedShiftRight", opLen3(ssa.OpMaskedShiftRightUint64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint64x4.MaskedShiftRight", opLen3(ssa.OpMaskedShiftRightUint64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint64x8.MaskedShiftRight", opLen3(ssa.OpMaskedShiftRightUint64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int16x8.MaskedShiftRightAndFillUpperFrom", opLen4(ssa.OpMaskedShiftRightAndFillUpperFromInt16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int16x16.MaskedShiftRightAndFillUpperFrom", opLen4(ssa.OpMaskedShiftRightAndFillUpperFromInt16x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int16x32.MaskedShiftRightAndFillUpperFrom", opLen4(ssa.OpMaskedShiftRightAndFillUpperFromInt16x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int32x4.MaskedShiftRightAndFillUpperFrom", opLen4(ssa.OpMaskedShiftRightAndFillUpperFromInt32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x8.MaskedShiftRightAndFillUpperFrom", opLen4(ssa.OpMaskedShiftRightAndFillUpperFromInt32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int32x16.MaskedShiftRightAndFillUpperFrom", opLen4(ssa.OpMaskedShiftRightAndFillUpperFromInt32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int64x2.MaskedShiftRightAndFillUpperFrom", opLen4(ssa.OpMaskedShiftRightAndFillUpperFromInt64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int64x4.MaskedShiftRightAndFillUpperFrom", opLen4(ssa.OpMaskedShiftRightAndFillUpperFromInt64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int64x8.MaskedShiftRightAndFillUpperFrom", opLen4(ssa.OpMaskedShiftRightAndFillUpperFromInt64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint16x8.MaskedShiftRightAndFillUpperFrom", opLen4(ssa.OpMaskedShiftRightAndFillUpperFromUint16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint16x16.MaskedShiftRightAndFillUpperFrom", opLen4(ssa.OpMaskedShiftRightAndFillUpperFromUint16x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint16x32.MaskedShiftRightAndFillUpperFrom", opLen4(ssa.OpMaskedShiftRightAndFillUpperFromUint16x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint32x4.MaskedShiftRightAndFillUpperFrom", opLen4(ssa.OpMaskedShiftRightAndFillUpperFromUint32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint32x8.MaskedShiftRightAndFillUpperFrom", opLen4(ssa.OpMaskedShiftRightAndFillUpperFromUint32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint32x16.MaskedShiftRightAndFillUpperFrom", opLen4(ssa.OpMaskedShiftRightAndFillUpperFromUint32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint64x2.MaskedShiftRightAndFillUpperFrom", opLen4(ssa.OpMaskedShiftRightAndFillUpperFromUint64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint64x4.MaskedShiftRightAndFillUpperFrom", opLen4(ssa.OpMaskedShiftRightAndFillUpperFromUint64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint64x8.MaskedShiftRightAndFillUpperFrom", opLen4(ssa.OpMaskedShiftRightAndFillUpperFromUint64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int16x8.MaskedShiftRightSignExtended", opLen3(ssa.OpMaskedShiftRightSignExtendedInt16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int16x16.MaskedShiftRightSignExtended", opLen3(ssa.OpMaskedShiftRightSignExtendedInt16x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int16x32.MaskedShiftRightSignExtended", opLen3(ssa.OpMaskedShiftRightSignExtendedInt16x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int32x4.MaskedShiftRightSignExtended", opLen3(ssa.OpMaskedShiftRightSignExtendedInt32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x8.MaskedShiftRightSignExtended", opLen3(ssa.OpMaskedShiftRightSignExtendedInt32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int32x16.MaskedShiftRightSignExtended", opLen3(ssa.OpMaskedShiftRightSignExtendedInt32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int64x2.MaskedShiftRightSignExtended", opLen3(ssa.OpMaskedShiftRightSignExtendedInt64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int64x4.MaskedShiftRightSignExtended", opLen3(ssa.OpMaskedShiftRightSignExtendedInt64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int64x8.MaskedShiftRightSignExtended", opLen3(ssa.OpMaskedShiftRightSignExtendedInt64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint16x8.MaskedShiftRightSignExtended", opLen3(ssa.OpMaskedShiftRightSignExtendedUint16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint16x16.MaskedShiftRightSignExtended", opLen3(ssa.OpMaskedShiftRightSignExtendedUint16x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint16x32.MaskedShiftRightSignExtended", opLen3(ssa.OpMaskedShiftRightSignExtendedUint16x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint32x4.MaskedShiftRightSignExtended", opLen3(ssa.OpMaskedShiftRightSignExtendedUint32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint32x8.MaskedShiftRightSignExtended", opLen3(ssa.OpMaskedShiftRightSignExtendedUint32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint32x16.MaskedShiftRightSignExtended", opLen3(ssa.OpMaskedShiftRightSignExtendedUint32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint64x2.MaskedShiftRightSignExtended", opLen3(ssa.OpMaskedShiftRightSignExtendedUint64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint64x4.MaskedShiftRightSignExtended", opLen3(ssa.OpMaskedShiftRightSignExtendedUint64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint64x8.MaskedShiftRightSignExtended", opLen3(ssa.OpMaskedShiftRightSignExtendedUint64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Float32x4.MaskedSqrt", opLen2(ssa.OpMaskedSqrtFloat32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Float32x8.MaskedSqrt", opLen2(ssa.OpMaskedSqrtFloat32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Float32x16.MaskedSqrt", opLen2(ssa.OpMaskedSqrtFloat32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Float64x2.MaskedSqrt", opLen2(ssa.OpMaskedSqrtFloat64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Float64x4.MaskedSqrt", opLen2(ssa.OpMaskedSqrtFloat64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Float64x8.MaskedSqrt", opLen2(ssa.OpMaskedSqrtFloat64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Float32x4.MaskedSub", opLen3(ssa.OpMaskedSubFloat32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Float32x8.MaskedSub", opLen3(ssa.OpMaskedSubFloat32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Float32x16.MaskedSub", opLen3(ssa.OpMaskedSubFloat32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Float64x2.MaskedSub", opLen3(ssa.OpMaskedSubFloat64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Float64x4.MaskedSub", opLen3(ssa.OpMaskedSubFloat64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Float64x8.MaskedSub", opLen3(ssa.OpMaskedSubFloat64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int8x16.MaskedSub", opLen3(ssa.OpMaskedSubInt8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int8x32.MaskedSub", opLen3(ssa.OpMaskedSubInt8x32, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int8x64.MaskedSub", opLen3(ssa.OpMaskedSubInt8x64, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int16x8.MaskedSub", opLen3(ssa.OpMaskedSubInt16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int16x16.MaskedSub", opLen3(ssa.OpMaskedSubInt16x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int16x32.MaskedSub", opLen3(ssa.OpMaskedSubInt16x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int32x4.MaskedSub", opLen3(ssa.OpMaskedSubInt32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x8.MaskedSub", opLen3(ssa.OpMaskedSubInt32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int32x16.MaskedSub", opLen3(ssa.OpMaskedSubInt32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int64x2.MaskedSub", opLen3(ssa.OpMaskedSubInt64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int64x4.MaskedSub", opLen3(ssa.OpMaskedSubInt64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int64x8.MaskedSub", opLen3(ssa.OpMaskedSubInt64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint8x16.MaskedSub", opLen3(ssa.OpMaskedSubUint8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint8x32.MaskedSub", opLen3(ssa.OpMaskedSubUint8x32, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint8x64.MaskedSub", opLen3(ssa.OpMaskedSubUint8x64, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint16x8.MaskedSub", opLen3(ssa.OpMaskedSubUint16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint16x16.MaskedSub", opLen3(ssa.OpMaskedSubUint16x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint16x32.MaskedSub", opLen3(ssa.OpMaskedSubUint16x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint32x4.MaskedSub", opLen3(ssa.OpMaskedSubUint32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint32x8.MaskedSub", opLen3(ssa.OpMaskedSubUint32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint32x16.MaskedSub", opLen3(ssa.OpMaskedSubUint32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint64x2.MaskedSub", opLen3(ssa.OpMaskedSubUint64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint64x4.MaskedSub", opLen3(ssa.OpMaskedSubUint64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint64x8.MaskedSub", opLen3(ssa.OpMaskedSubUint64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Float32x4.MaskedTruncWithPrecision", opLen2Imm8(ssa.OpMaskedTruncWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64) - addF(simdPackage, "Float32x8.MaskedTruncWithPrecision", opLen2Imm8(ssa.OpMaskedTruncWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64) - addF(simdPackage, "Float32x16.MaskedTruncWithPrecision", opLen2Imm8(ssa.OpMaskedTruncWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64) - addF(simdPackage, "Float64x2.MaskedTruncWithPrecision", opLen2Imm8(ssa.OpMaskedTruncWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) - addF(simdPackage, "Float64x4.MaskedTruncWithPrecision", opLen2Imm8(ssa.OpMaskedTruncWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) - addF(simdPackage, "Float64x8.MaskedTruncWithPrecision", opLen2Imm8(ssa.OpMaskedTruncWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) - addF(simdPackage, "Int32x4.MaskedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedUnsignedSignedQuadDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x8.MaskedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedUnsignedSignedQuadDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int32x16.MaskedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedUnsignedSignedQuadDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint32x4.MaskedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedUnsignedSignedQuadDotProdAccumulateUint32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint32x8.MaskedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedUnsignedSignedQuadDotProdAccumulateUint32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint32x16.MaskedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedUnsignedSignedQuadDotProdAccumulateUint32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int32x4.MaskedXor", opLen3(ssa.OpMaskedXorInt32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x8.MaskedXor", opLen3(ssa.OpMaskedXorInt32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int32x16.MaskedXor", opLen3(ssa.OpMaskedXorInt32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int64x2.MaskedXor", opLen3(ssa.OpMaskedXorInt64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int64x4.MaskedXor", opLen3(ssa.OpMaskedXorInt64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int64x8.MaskedXor", opLen3(ssa.OpMaskedXorInt64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint32x4.MaskedXor", opLen3(ssa.OpMaskedXorUint32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint32x8.MaskedXor", opLen3(ssa.OpMaskedXorUint32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint32x16.MaskedXor", opLen3(ssa.OpMaskedXorUint32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint64x2.MaskedXor", opLen3(ssa.OpMaskedXorUint64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint64x4.MaskedXor", opLen3(ssa.OpMaskedXorUint64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint64x8.MaskedXor", opLen3(ssa.OpMaskedXorUint64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Float32x4.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedFloat32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float32x8.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedFloat32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float32x16.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedFloat32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Float64x2.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedFloat64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float64x4.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedFloat64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float64x8.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedFloat64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int8x16.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int8x32.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt8x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int8x64.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt8x64, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int16x8.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x16.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int16x32.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int32x4.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x16.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int64x2.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x8.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint8x16.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint8x32.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint8x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint8x64.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint8x64, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint16x8.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint16x16.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint16x32.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint32x4.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x8.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x16.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint64x2.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x4.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint64x8.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Float32x4.LessMasked", opLen3(ssa.OpLessMaskedFloat32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float32x8.LessMasked", opLen3(ssa.OpLessMaskedFloat32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float32x16.LessMasked", opLen3(ssa.OpLessMaskedFloat32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Float64x2.LessMasked", opLen3(ssa.OpLessMaskedFloat64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float64x4.LessMasked", opLen3(ssa.OpLessMaskedFloat64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float64x8.LessMasked", opLen3(ssa.OpLessMaskedFloat64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int8x16.LessMasked", opLen3(ssa.OpLessMaskedInt8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int8x32.LessMasked", opLen3(ssa.OpLessMaskedInt8x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int8x64.LessMasked", opLen3(ssa.OpLessMaskedInt8x64, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int16x8.LessMasked", opLen3(ssa.OpLessMaskedInt16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x16.LessMasked", opLen3(ssa.OpLessMaskedInt16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int16x32.LessMasked", opLen3(ssa.OpLessMaskedInt16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int32x4.LessMasked", opLen3(ssa.OpLessMaskedInt32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.LessMasked", opLen3(ssa.OpLessMaskedInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x16.LessMasked", opLen3(ssa.OpLessMaskedInt32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int64x2.LessMasked", opLen3(ssa.OpLessMaskedInt64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.LessMasked", opLen3(ssa.OpLessMaskedInt64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x8.LessMasked", opLen3(ssa.OpLessMaskedInt64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint8x16.LessMasked", opLen3(ssa.OpLessMaskedUint8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint8x32.LessMasked", opLen3(ssa.OpLessMaskedUint8x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint8x64.LessMasked", opLen3(ssa.OpLessMaskedUint8x64, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint16x8.LessMasked", opLen3(ssa.OpLessMaskedUint16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint16x16.LessMasked", opLen3(ssa.OpLessMaskedUint16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint16x32.LessMasked", opLen3(ssa.OpLessMaskedUint16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint32x4.LessMasked", opLen3(ssa.OpLessMaskedUint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x8.LessMasked", opLen3(ssa.OpLessMaskedUint32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x16.LessMasked", opLen3(ssa.OpLessMaskedUint32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint64x2.LessMasked", opLen3(ssa.OpLessMaskedUint64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x4.LessMasked", opLen3(ssa.OpLessMaskedUint64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint64x8.LessMasked", opLen3(ssa.OpLessMaskedUint64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float32x4.Max", opLen2(ssa.OpMaxFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x8.Max", opLen2(ssa.OpMaxFloat32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float32x16.Max", opLen2(ssa.OpMaxFloat32x16, types.TypeVec512), sys.AMD64) @@ -1162,6 +706,36 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Uint64x2.Max", opLen2(ssa.OpMaxUint64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint64x4.Max", opLen2(ssa.OpMaxUint64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint64x8.Max", opLen2(ssa.OpMaxUint64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Float32x4.MaxMasked", opLen3(ssa.OpMaxMaskedFloat32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float32x8.MaxMasked", opLen3(ssa.OpMaxMaskedFloat32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float32x16.MaxMasked", opLen3(ssa.OpMaxMaskedFloat32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Float64x2.MaxMasked", opLen3(ssa.OpMaxMaskedFloat64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float64x4.MaxMasked", opLen3(ssa.OpMaxMaskedFloat64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float64x8.MaxMasked", opLen3(ssa.OpMaxMaskedFloat64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int8x16.MaxMasked", opLen3(ssa.OpMaxMaskedInt8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int8x32.MaxMasked", opLen3(ssa.OpMaxMaskedInt8x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int8x64.MaxMasked", opLen3(ssa.OpMaxMaskedInt8x64, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int16x8.MaxMasked", opLen3(ssa.OpMaxMaskedInt16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x16.MaxMasked", opLen3(ssa.OpMaxMaskedInt16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int16x32.MaxMasked", opLen3(ssa.OpMaxMaskedInt16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int32x4.MaxMasked", opLen3(ssa.OpMaxMaskedInt32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.MaxMasked", opLen3(ssa.OpMaxMaskedInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x16.MaxMasked", opLen3(ssa.OpMaxMaskedInt32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int64x2.MaxMasked", opLen3(ssa.OpMaxMaskedInt64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.MaxMasked", opLen3(ssa.OpMaxMaskedInt64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x8.MaxMasked", opLen3(ssa.OpMaxMaskedInt64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint8x16.MaxMasked", opLen3(ssa.OpMaxMaskedUint8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint8x32.MaxMasked", opLen3(ssa.OpMaxMaskedUint8x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint8x64.MaxMasked", opLen3(ssa.OpMaxMaskedUint8x64, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint16x8.MaxMasked", opLen3(ssa.OpMaxMaskedUint16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint16x16.MaxMasked", opLen3(ssa.OpMaxMaskedUint16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint16x32.MaxMasked", opLen3(ssa.OpMaxMaskedUint16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint32x4.MaxMasked", opLen3(ssa.OpMaxMaskedUint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x8.MaxMasked", opLen3(ssa.OpMaxMaskedUint32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x16.MaxMasked", opLen3(ssa.OpMaxMaskedUint32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint64x2.MaxMasked", opLen3(ssa.OpMaxMaskedUint64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x4.MaxMasked", opLen3(ssa.OpMaxMaskedUint64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint64x8.MaxMasked", opLen3(ssa.OpMaxMaskedUint64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float32x4.Min", opLen2(ssa.OpMinFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x8.Min", opLen2(ssa.OpMinFloat32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float32x16.Min", opLen2(ssa.OpMinFloat32x16, types.TypeVec512), sys.AMD64) @@ -1192,6 +766,36 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Uint64x2.Min", opLen2(ssa.OpMinUint64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint64x4.Min", opLen2(ssa.OpMinUint64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint64x8.Min", opLen2(ssa.OpMinUint64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Float32x4.MinMasked", opLen3(ssa.OpMinMaskedFloat32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float32x8.MinMasked", opLen3(ssa.OpMinMaskedFloat32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float32x16.MinMasked", opLen3(ssa.OpMinMaskedFloat32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Float64x2.MinMasked", opLen3(ssa.OpMinMaskedFloat64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float64x4.MinMasked", opLen3(ssa.OpMinMaskedFloat64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float64x8.MinMasked", opLen3(ssa.OpMinMaskedFloat64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int8x16.MinMasked", opLen3(ssa.OpMinMaskedInt8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int8x32.MinMasked", opLen3(ssa.OpMinMaskedInt8x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int8x64.MinMasked", opLen3(ssa.OpMinMaskedInt8x64, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int16x8.MinMasked", opLen3(ssa.OpMinMaskedInt16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x16.MinMasked", opLen3(ssa.OpMinMaskedInt16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int16x32.MinMasked", opLen3(ssa.OpMinMaskedInt16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int32x4.MinMasked", opLen3(ssa.OpMinMaskedInt32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.MinMasked", opLen3(ssa.OpMinMaskedInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x16.MinMasked", opLen3(ssa.OpMinMaskedInt32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int64x2.MinMasked", opLen3(ssa.OpMinMaskedInt64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.MinMasked", opLen3(ssa.OpMinMaskedInt64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x8.MinMasked", opLen3(ssa.OpMinMaskedInt64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint8x16.MinMasked", opLen3(ssa.OpMinMaskedUint8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint8x32.MinMasked", opLen3(ssa.OpMinMaskedUint8x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint8x64.MinMasked", opLen3(ssa.OpMinMaskedUint8x64, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint16x8.MinMasked", opLen3(ssa.OpMinMaskedUint16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint16x16.MinMasked", opLen3(ssa.OpMinMaskedUint16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint16x32.MinMasked", opLen3(ssa.OpMinMaskedUint16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint32x4.MinMasked", opLen3(ssa.OpMinMaskedUint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x8.MinMasked", opLen3(ssa.OpMinMaskedUint32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x16.MinMasked", opLen3(ssa.OpMinMaskedUint32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint64x2.MinMasked", opLen3(ssa.OpMinMaskedUint64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x4.MinMasked", opLen3(ssa.OpMinMaskedUint64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint64x8.MinMasked", opLen3(ssa.OpMinMaskedUint64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float32x4.Mul", opLen2(ssa.OpMulFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x8.Mul", opLen2(ssa.OpMulFloat32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float32x16.Mul", opLen2(ssa.OpMulFloat32x16, types.TypeVec512), sys.AMD64) @@ -1204,6 +808,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Float64x2.MulByPowOf2", opLen2(ssa.OpMulByPowOf2Float64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float64x4.MulByPowOf2", opLen2(ssa.OpMulByPowOf2Float64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float64x8.MulByPowOf2", opLen2(ssa.OpMulByPowOf2Float64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Float32x4.MulByPowOf2Masked", opLen3(ssa.OpMulByPowOf2MaskedFloat32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float32x8.MulByPowOf2Masked", opLen3(ssa.OpMulByPowOf2MaskedFloat32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float32x16.MulByPowOf2Masked", opLen3(ssa.OpMulByPowOf2MaskedFloat32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Float64x2.MulByPowOf2Masked", opLen3(ssa.OpMulByPowOf2MaskedFloat64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float64x4.MulByPowOf2Masked", opLen3(ssa.OpMulByPowOf2MaskedFloat64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float64x8.MulByPowOf2Masked", opLen3(ssa.OpMulByPowOf2MaskedFloat64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int32x4.MulEvenWiden", opLen2(ssa.OpMulEvenWidenInt32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int32x8.MulEvenWiden", opLen2(ssa.OpMulEvenWidenInt32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int64x2.MulEvenWiden", opLen2(ssa.OpMulEvenWidenInt64x2, types.TypeVec128), sys.AMD64) @@ -1214,12 +824,24 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Uint64x2.MulEvenWiden", opLen2(ssa.OpMulEvenWidenUint64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint64x4.MulEvenWiden", opLen2(ssa.OpMulEvenWidenUint64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint64x8.MulEvenWiden", opLen2(ssa.OpMulEvenWidenUint64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int64x2.MulEvenWidenMasked", opLen3(ssa.OpMulEvenWidenMaskedInt64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.MulEvenWidenMasked", opLen3(ssa.OpMulEvenWidenMaskedInt64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x8.MulEvenWidenMasked", opLen3(ssa.OpMulEvenWidenMaskedInt64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint64x2.MulEvenWidenMasked", opLen3(ssa.OpMulEvenWidenMaskedUint64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x4.MulEvenWidenMasked", opLen3(ssa.OpMulEvenWidenMaskedUint64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint64x8.MulEvenWidenMasked", opLen3(ssa.OpMulEvenWidenMaskedUint64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int16x8.MulHigh", opLen2(ssa.OpMulHighInt16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int16x16.MulHigh", opLen2(ssa.OpMulHighInt16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int16x32.MulHigh", opLen2(ssa.OpMulHighInt16x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint16x8.MulHigh", opLen2(ssa.OpMulHighUint16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint16x16.MulHigh", opLen2(ssa.OpMulHighUint16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint16x32.MulHigh", opLen2(ssa.OpMulHighUint16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int16x8.MulHighMasked", opLen3(ssa.OpMulHighMaskedInt16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x16.MulHighMasked", opLen3(ssa.OpMulHighMaskedInt16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int16x32.MulHighMasked", opLen3(ssa.OpMulHighMaskedInt16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint16x8.MulHighMasked", opLen3(ssa.OpMulHighMaskedUint16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint16x16.MulHighMasked", opLen3(ssa.OpMulHighMaskedUint16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint16x32.MulHighMasked", opLen3(ssa.OpMulHighMaskedUint16x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int16x8.MulLow", opLen2(ssa.OpMulLowInt16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int16x16.MulLow", opLen2(ssa.OpMulLowInt16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int16x32.MulLow", opLen2(ssa.OpMulLowInt16x32, types.TypeVec512), sys.AMD64) @@ -1229,6 +851,21 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Int64x2.MulLow", opLen2(ssa.OpMulLowInt64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int64x4.MulLow", opLen2(ssa.OpMulLowInt64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int64x8.MulLow", opLen2(ssa.OpMulLowInt64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int16x8.MulLowMasked", opLen3(ssa.OpMulLowMaskedInt16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x16.MulLowMasked", opLen3(ssa.OpMulLowMaskedInt16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int16x32.MulLowMasked", opLen3(ssa.OpMulLowMaskedInt16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int32x4.MulLowMasked", opLen3(ssa.OpMulLowMaskedInt32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.MulLowMasked", opLen3(ssa.OpMulLowMaskedInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x16.MulLowMasked", opLen3(ssa.OpMulLowMaskedInt32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int64x2.MulLowMasked", opLen3(ssa.OpMulLowMaskedInt64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.MulLowMasked", opLen3(ssa.OpMulLowMaskedInt64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x8.MulLowMasked", opLen3(ssa.OpMulLowMaskedInt64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Float32x4.MulMasked", opLen3(ssa.OpMulMaskedFloat32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float32x8.MulMasked", opLen3(ssa.OpMulMaskedFloat32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float32x16.MulMasked", opLen3(ssa.OpMulMaskedFloat32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Float64x2.MulMasked", opLen3(ssa.OpMulMaskedFloat64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float64x4.MulMasked", opLen3(ssa.OpMulMaskedFloat64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float64x8.MulMasked", opLen3(ssa.OpMulMaskedFloat64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float32x4.NotEqual", opLen2(ssa.OpNotEqualFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x8.NotEqual", opLen2(ssa.OpNotEqualFloat32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float32x16.NotEqual", opLen2(ssa.OpNotEqualFloat32x16, types.TypeVec512), sys.AMD64) @@ -1259,6 +896,36 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Uint64x2.NotEqual", opLen2(ssa.OpNotEqualUint64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint64x4.NotEqual", opLen2(ssa.OpNotEqualUint64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint64x8.NotEqual", opLen2(ssa.OpNotEqualUint64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Float32x4.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedFloat32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float32x8.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedFloat32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float32x16.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedFloat32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Float64x2.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedFloat64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float64x4.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedFloat64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float64x8.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedFloat64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int8x16.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int8x32.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt8x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int8x64.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt8x64, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int16x8.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x16.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int16x32.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int32x4.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x16.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int64x2.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x8.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint8x16.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint8x32.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint8x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint8x64.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint8x64, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint16x8.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint16x16.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint16x32.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint32x4.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x8.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x16.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint64x2.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x4.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint64x8.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int8x16.Or", opLen2(ssa.OpOrInt8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int8x32.Or", opLen2(ssa.OpOrInt8x32, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int16x8.Or", opLen2(ssa.OpOrInt16x8, types.TypeVec128), sys.AMD64) @@ -1279,12 +946,30 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Uint64x2.Or", opLen2(ssa.OpOrUint64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint64x4.Or", opLen2(ssa.OpOrUint64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint64x8.Or", opLen2(ssa.OpOrUint64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int32x4.OrMasked", opLen3(ssa.OpOrMaskedInt32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.OrMasked", opLen3(ssa.OpOrMaskedInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x16.OrMasked", opLen3(ssa.OpOrMaskedInt32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int64x2.OrMasked", opLen3(ssa.OpOrMaskedInt64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.OrMasked", opLen3(ssa.OpOrMaskedInt64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x8.OrMasked", opLen3(ssa.OpOrMaskedInt64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint32x4.OrMasked", opLen3(ssa.OpOrMaskedUint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x8.OrMasked", opLen3(ssa.OpOrMaskedUint32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x16.OrMasked", opLen3(ssa.OpOrMaskedUint32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint64x2.OrMasked", opLen3(ssa.OpOrMaskedUint64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x4.OrMasked", opLen3(ssa.OpOrMaskedUint64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint64x8.OrMasked", opLen3(ssa.OpOrMaskedUint64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int16x8.PairDotProd", opLen2(ssa.OpPairDotProdInt16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int16x16.PairDotProd", opLen2(ssa.OpPairDotProdInt16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int16x32.PairDotProd", opLen2(ssa.OpPairDotProdInt16x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int32x4.PairDotProdAccumulate", opLen3(ssa.OpPairDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int32x8.PairDotProdAccumulate", opLen3(ssa.OpPairDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int32x16.PairDotProdAccumulate", opLen3(ssa.OpPairDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int32x4.PairDotProdAccumulateMasked", opLen4(ssa.OpPairDotProdAccumulateMaskedInt32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.PairDotProdAccumulateMasked", opLen4(ssa.OpPairDotProdAccumulateMaskedInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x16.PairDotProdAccumulateMasked", opLen4(ssa.OpPairDotProdAccumulateMaskedInt32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int16x8.PairDotProdMasked", opLen3(ssa.OpPairDotProdMaskedInt16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x16.PairDotProdMasked", opLen3(ssa.OpPairDotProdMaskedInt16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int16x32.PairDotProdMasked", opLen3(ssa.OpPairDotProdMaskedInt16x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float32x4.PairwiseAdd", opLen2(ssa.OpPairwiseAddFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x8.PairwiseAdd", opLen2(ssa.OpPairwiseAddFloat32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float64x2.PairwiseAdd", opLen2(ssa.OpPairwiseAddFloat64x2, types.TypeVec128), sys.AMD64) @@ -1333,6 +1018,30 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Uint64x2.PopCount", opLen1(ssa.OpPopCountUint64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint64x4.PopCount", opLen1(ssa.OpPopCountUint64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint64x8.PopCount", opLen1(ssa.OpPopCountUint64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int8x16.PopCountMasked", opLen2(ssa.OpPopCountMaskedInt8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int8x32.PopCountMasked", opLen2(ssa.OpPopCountMaskedInt8x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int8x64.PopCountMasked", opLen2(ssa.OpPopCountMaskedInt8x64, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int16x8.PopCountMasked", opLen2(ssa.OpPopCountMaskedInt16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x16.PopCountMasked", opLen2(ssa.OpPopCountMaskedInt16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int16x32.PopCountMasked", opLen2(ssa.OpPopCountMaskedInt16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int32x4.PopCountMasked", opLen2(ssa.OpPopCountMaskedInt32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.PopCountMasked", opLen2(ssa.OpPopCountMaskedInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x16.PopCountMasked", opLen2(ssa.OpPopCountMaskedInt32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int64x2.PopCountMasked", opLen2(ssa.OpPopCountMaskedInt64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.PopCountMasked", opLen2(ssa.OpPopCountMaskedInt64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x8.PopCountMasked", opLen2(ssa.OpPopCountMaskedInt64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint8x16.PopCountMasked", opLen2(ssa.OpPopCountMaskedUint8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint8x32.PopCountMasked", opLen2(ssa.OpPopCountMaskedUint8x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint8x64.PopCountMasked", opLen2(ssa.OpPopCountMaskedUint8x64, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint16x8.PopCountMasked", opLen2(ssa.OpPopCountMaskedUint16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint16x16.PopCountMasked", opLen2(ssa.OpPopCountMaskedUint16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint16x32.PopCountMasked", opLen2(ssa.OpPopCountMaskedUint16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint32x4.PopCountMasked", opLen2(ssa.OpPopCountMaskedUint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x8.PopCountMasked", opLen2(ssa.OpPopCountMaskedUint32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x16.PopCountMasked", opLen2(ssa.OpPopCountMaskedUint32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint64x2.PopCountMasked", opLen2(ssa.OpPopCountMaskedUint64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x4.PopCountMasked", opLen2(ssa.OpPopCountMaskedUint64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint64x8.PopCountMasked", opLen2(ssa.OpPopCountMaskedUint64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int32x4.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftInt32x4, types.TypeVec128, 0), sys.AMD64) addF(simdPackage, "Int32x8.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftInt32x8, types.TypeVec256, 0), sys.AMD64) addF(simdPackage, "Int32x16.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftInt32x16, types.TypeVec512, 0), sys.AMD64) @@ -1345,6 +1054,18 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Uint64x2.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftUint64x2, types.TypeVec128, 0), sys.AMD64) addF(simdPackage, "Uint64x4.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftUint64x4, types.TypeVec256, 0), sys.AMD64) addF(simdPackage, "Uint64x8.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftUint64x8, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Int32x4.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedInt32x4, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Int32x8.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedInt32x8, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Int32x16.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedInt32x16, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Int64x2.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedInt64x2, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Int64x4.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedInt64x4, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Int64x8.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedInt64x8, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Uint32x4.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedUint32x4, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Uint32x8.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedUint32x8, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Uint32x16.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedUint32x16, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Uint64x2.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedUint64x2, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Uint64x4.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedUint64x4, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Uint64x8.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedUint64x8, types.TypeVec512, 0), sys.AMD64) addF(simdPackage, "Int32x4.RotateAllRight", opLen1Imm8(ssa.OpRotateAllRightInt32x4, types.TypeVec128, 0), sys.AMD64) addF(simdPackage, "Int32x8.RotateAllRight", opLen1Imm8(ssa.OpRotateAllRightInt32x8, types.TypeVec256, 0), sys.AMD64) addF(simdPackage, "Int32x16.RotateAllRight", opLen1Imm8(ssa.OpRotateAllRightInt32x16, types.TypeVec512, 0), sys.AMD64) @@ -1357,6 +1078,18 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Uint64x2.RotateAllRight", opLen1Imm8(ssa.OpRotateAllRightUint64x2, types.TypeVec128, 0), sys.AMD64) addF(simdPackage, "Uint64x4.RotateAllRight", opLen1Imm8(ssa.OpRotateAllRightUint64x4, types.TypeVec256, 0), sys.AMD64) addF(simdPackage, "Uint64x8.RotateAllRight", opLen1Imm8(ssa.OpRotateAllRightUint64x8, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Int32x4.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedInt32x4, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Int32x8.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedInt32x8, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Int32x16.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedInt32x16, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Int64x2.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedInt64x2, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Int64x4.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedInt64x4, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Int64x8.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedInt64x8, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Uint32x4.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedUint32x4, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Uint32x8.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedUint32x8, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Uint32x16.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedUint32x16, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Uint64x2.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedUint64x2, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Uint64x4.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedUint64x4, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Uint64x8.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedUint64x8, types.TypeVec512, 0), sys.AMD64) addF(simdPackage, "Int32x4.RotateLeft", opLen2(ssa.OpRotateLeftInt32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int32x8.RotateLeft", opLen2(ssa.OpRotateLeftInt32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int32x16.RotateLeft", opLen2(ssa.OpRotateLeftInt32x16, types.TypeVec512), sys.AMD64) @@ -1369,6 +1102,18 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Uint64x2.RotateLeft", opLen2(ssa.OpRotateLeftUint64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint64x4.RotateLeft", opLen2(ssa.OpRotateLeftUint64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint64x8.RotateLeft", opLen2(ssa.OpRotateLeftUint64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int32x4.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedInt32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x16.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedInt32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int64x2.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedInt64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedInt64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x8.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedInt64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint32x4.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedUint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x8.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedUint32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x16.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedUint32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint64x2.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedUint64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x4.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedUint64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint64x8.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedUint64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int32x4.RotateRight", opLen2(ssa.OpRotateRightInt32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int32x8.RotateRight", opLen2(ssa.OpRotateRightInt32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int32x16.RotateRight", opLen2(ssa.OpRotateRightInt32x16, types.TypeVec512), sys.AMD64) @@ -1381,6 +1126,18 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Uint64x2.RotateRight", opLen2(ssa.OpRotateRightUint64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint64x4.RotateRight", opLen2(ssa.OpRotateRightUint64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint64x8.RotateRight", opLen2(ssa.OpRotateRightUint64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int32x4.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedInt32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x16.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedInt32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int64x2.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedInt64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedInt64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x8.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedInt64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint32x4.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedUint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x8.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedUint32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x16.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedUint32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint64x2.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedUint64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x4.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedUint64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint64x8.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedUint64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float32x4.Round", opLen1(ssa.OpRoundFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x8.Round", opLen1(ssa.OpRoundFloat32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float64x2.Round", opLen1(ssa.OpRoundFloat64x2, types.TypeVec128), sys.AMD64) @@ -1391,6 +1148,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Float64x2.RoundWithPrecision", opLen1Imm8(ssa.OpRoundWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float64x4.RoundWithPrecision", opLen1Imm8(ssa.OpRoundWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float64x8.RoundWithPrecision", opLen1Imm8(ssa.OpRoundWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x4.RoundWithPrecisionMasked", opLen2Imm8(ssa.OpRoundWithPrecisionMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float32x8.RoundWithPrecisionMasked", opLen2Imm8(ssa.OpRoundWithPrecisionMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float32x16.RoundWithPrecisionMasked", opLen2Imm8(ssa.OpRoundWithPrecisionMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float64x2.RoundWithPrecisionMasked", opLen2Imm8(ssa.OpRoundWithPrecisionMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float64x4.RoundWithPrecisionMasked", opLen2Imm8(ssa.OpRoundWithPrecisionMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x8.RoundWithPrecisionMasked", opLen2Imm8(ssa.OpRoundWithPrecisionMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Int8x16.SaturatedAdd", opLen2(ssa.OpSaturatedAddInt8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int8x32.SaturatedAdd", opLen2(ssa.OpSaturatedAddInt8x32, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int8x64.SaturatedAdd", opLen2(ssa.OpSaturatedAddInt8x64, types.TypeVec512), sys.AMD64) @@ -1403,9 +1166,24 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Uint16x8.SaturatedAdd", opLen2(ssa.OpSaturatedAddUint16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint16x16.SaturatedAdd", opLen2(ssa.OpSaturatedAddUint16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint16x32.SaturatedAdd", opLen2(ssa.OpSaturatedAddUint16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int8x16.SaturatedAddMasked", opLen3(ssa.OpSaturatedAddMaskedInt8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int8x32.SaturatedAddMasked", opLen3(ssa.OpSaturatedAddMaskedInt8x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int8x64.SaturatedAddMasked", opLen3(ssa.OpSaturatedAddMaskedInt8x64, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int16x8.SaturatedAddMasked", opLen3(ssa.OpSaturatedAddMaskedInt16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x16.SaturatedAddMasked", opLen3(ssa.OpSaturatedAddMaskedInt16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int16x32.SaturatedAddMasked", opLen3(ssa.OpSaturatedAddMaskedInt16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint8x16.SaturatedAddMasked", opLen3(ssa.OpSaturatedAddMaskedUint8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint8x32.SaturatedAddMasked", opLen3(ssa.OpSaturatedAddMaskedUint8x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint8x64.SaturatedAddMasked", opLen3(ssa.OpSaturatedAddMaskedUint8x64, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint16x8.SaturatedAddMasked", opLen3(ssa.OpSaturatedAddMaskedUint16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint16x16.SaturatedAddMasked", opLen3(ssa.OpSaturatedAddMaskedUint16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint16x32.SaturatedAddMasked", opLen3(ssa.OpSaturatedAddMaskedUint16x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int32x4.SaturatedPairDotProdAccumulate", opLen3(ssa.OpSaturatedPairDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int32x8.SaturatedPairDotProdAccumulate", opLen3(ssa.OpSaturatedPairDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int32x16.SaturatedPairDotProdAccumulate", opLen3(ssa.OpSaturatedPairDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int32x4.SaturatedPairDotProdAccumulateMasked", opLen4(ssa.OpSaturatedPairDotProdAccumulateMaskedInt32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.SaturatedPairDotProdAccumulateMasked", opLen4(ssa.OpSaturatedPairDotProdAccumulateMaskedInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x16.SaturatedPairDotProdAccumulateMasked", opLen4(ssa.OpSaturatedPairDotProdAccumulateMaskedInt32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int16x8.SaturatedPairwiseAdd", opLen2(ssa.OpSaturatedPairwiseAddInt16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int16x16.SaturatedPairwiseAdd", opLen2(ssa.OpSaturatedPairwiseAddInt16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int16x8.SaturatedPairwiseSub", opLen2(ssa.OpSaturatedPairwiseSubInt16x8, types.TypeVec128), sys.AMD64) @@ -1422,15 +1200,36 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Uint16x8.SaturatedSub", opLen2(ssa.OpSaturatedSubUint16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint16x16.SaturatedSub", opLen2(ssa.OpSaturatedSubUint16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint16x32.SaturatedSub", opLen2(ssa.OpSaturatedSubUint16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int8x16.SaturatedSubMasked", opLen3(ssa.OpSaturatedSubMaskedInt8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int8x32.SaturatedSubMasked", opLen3(ssa.OpSaturatedSubMaskedInt8x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int8x64.SaturatedSubMasked", opLen3(ssa.OpSaturatedSubMaskedInt8x64, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int16x8.SaturatedSubMasked", opLen3(ssa.OpSaturatedSubMaskedInt16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x16.SaturatedSubMasked", opLen3(ssa.OpSaturatedSubMaskedInt16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int16x32.SaturatedSubMasked", opLen3(ssa.OpSaturatedSubMaskedInt16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint8x16.SaturatedSubMasked", opLen3(ssa.OpSaturatedSubMaskedUint8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint8x32.SaturatedSubMasked", opLen3(ssa.OpSaturatedSubMaskedUint8x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint8x64.SaturatedSubMasked", opLen3(ssa.OpSaturatedSubMaskedUint8x64, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint16x8.SaturatedSubMasked", opLen3(ssa.OpSaturatedSubMaskedUint16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint16x16.SaturatedSubMasked", opLen3(ssa.OpSaturatedSubMaskedUint16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint16x32.SaturatedSubMasked", opLen3(ssa.OpSaturatedSubMaskedUint16x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint8x16.SaturatedUnsignedSignedPairDotProd", opLen2(ssa.OpSaturatedUnsignedSignedPairDotProdUint8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint8x32.SaturatedUnsignedSignedPairDotProd", opLen2(ssa.OpSaturatedUnsignedSignedPairDotProdUint8x32, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint8x64.SaturatedUnsignedSignedPairDotProd", opLen2(ssa.OpSaturatedUnsignedSignedPairDotProdUint8x64, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint8x16.SaturatedUnsignedSignedPairDotProdMasked", opLen3(ssa.OpSaturatedUnsignedSignedPairDotProdMaskedUint8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint8x32.SaturatedUnsignedSignedPairDotProdMasked", opLen3(ssa.OpSaturatedUnsignedSignedPairDotProdMaskedUint8x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint8x64.SaturatedUnsignedSignedPairDotProdMasked", opLen3(ssa.OpSaturatedUnsignedSignedPairDotProdMaskedUint8x64, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int32x4.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int32x8.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int32x16.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint32x4.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint32x8.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint32x16.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int32x4.SaturatedUnsignedSignedQuadDotProdAccumulateMasked", opLen4(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.SaturatedUnsignedSignedQuadDotProdAccumulateMasked", opLen4(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x16.SaturatedUnsignedSignedQuadDotProdAccumulateMasked", opLen4(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint32x4.SaturatedUnsignedSignedQuadDotProdAccumulateMasked", opLen4(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x8.SaturatedUnsignedSignedQuadDotProdAccumulateMasked", opLen4(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x16.SaturatedUnsignedSignedQuadDotProdAccumulateMasked", opLen4(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float32x8.Set128", opLen2Imm8(ssa.OpSet128Float32x8, types.TypeVec256, 0), sys.AMD64) addF(simdPackage, "Float64x4.Set128", opLen2Imm8(ssa.OpSet128Float64x4, types.TypeVec256, 0), sys.AMD64) addF(simdPackage, "Int8x32.Set128", opLen2Imm8(ssa.OpSet128Int8x32, types.TypeVec256, 0), sys.AMD64) @@ -1481,6 +1280,30 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Uint64x2.ShiftAllLeftAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllLeftAndFillUpperFromUint64x2, types.TypeVec128, 0), sys.AMD64) addF(simdPackage, "Uint64x4.ShiftAllLeftAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllLeftAndFillUpperFromUint64x4, types.TypeVec256, 0), sys.AMD64) addF(simdPackage, "Uint64x8.ShiftAllLeftAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllLeftAndFillUpperFromUint64x8, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Int16x8.ShiftAllLeftAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllLeftAndFillUpperFromMaskedInt16x8, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Int16x16.ShiftAllLeftAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllLeftAndFillUpperFromMaskedInt16x16, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Int16x32.ShiftAllLeftAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllLeftAndFillUpperFromMaskedInt16x32, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Int32x4.ShiftAllLeftAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllLeftAndFillUpperFromMaskedInt32x4, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Int32x8.ShiftAllLeftAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllLeftAndFillUpperFromMaskedInt32x8, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Int32x16.ShiftAllLeftAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllLeftAndFillUpperFromMaskedInt32x16, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Int64x2.ShiftAllLeftAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllLeftAndFillUpperFromMaskedInt64x2, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Int64x4.ShiftAllLeftAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllLeftAndFillUpperFromMaskedInt64x4, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Int64x8.ShiftAllLeftAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllLeftAndFillUpperFromMaskedInt64x8, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Uint16x8.ShiftAllLeftAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllLeftAndFillUpperFromMaskedUint16x8, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Uint16x16.ShiftAllLeftAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllLeftAndFillUpperFromMaskedUint16x16, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Uint16x32.ShiftAllLeftAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllLeftAndFillUpperFromMaskedUint16x32, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Uint32x4.ShiftAllLeftAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllLeftAndFillUpperFromMaskedUint32x4, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Uint32x8.ShiftAllLeftAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllLeftAndFillUpperFromMaskedUint32x8, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Uint32x16.ShiftAllLeftAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllLeftAndFillUpperFromMaskedUint32x16, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Uint64x2.ShiftAllLeftAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllLeftAndFillUpperFromMaskedUint64x2, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Uint64x4.ShiftAllLeftAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllLeftAndFillUpperFromMaskedUint64x4, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Uint64x8.ShiftAllLeftAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllLeftAndFillUpperFromMaskedUint64x8, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Int64x2.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x8.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint64x2.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x4.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint64x8.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int16x8.ShiftAllRight", opLen2(ssa.OpShiftAllRightInt16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int16x16.ShiftAllRight", opLen2(ssa.OpShiftAllRightInt16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int32x4.ShiftAllRight", opLen2(ssa.OpShiftAllRightInt32x4, types.TypeVec128), sys.AMD64) @@ -1513,6 +1336,30 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Uint64x2.ShiftAllRightAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllRightAndFillUpperFromUint64x2, types.TypeVec128, 0), sys.AMD64) addF(simdPackage, "Uint64x4.ShiftAllRightAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllRightAndFillUpperFromUint64x4, types.TypeVec256, 0), sys.AMD64) addF(simdPackage, "Uint64x8.ShiftAllRightAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllRightAndFillUpperFromUint64x8, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Int16x8.ShiftAllRightAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllRightAndFillUpperFromMaskedInt16x8, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Int16x16.ShiftAllRightAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllRightAndFillUpperFromMaskedInt16x16, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Int16x32.ShiftAllRightAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllRightAndFillUpperFromMaskedInt16x32, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Int32x4.ShiftAllRightAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllRightAndFillUpperFromMaskedInt32x4, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Int32x8.ShiftAllRightAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllRightAndFillUpperFromMaskedInt32x8, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Int32x16.ShiftAllRightAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllRightAndFillUpperFromMaskedInt32x16, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Int64x2.ShiftAllRightAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllRightAndFillUpperFromMaskedInt64x2, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Int64x4.ShiftAllRightAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllRightAndFillUpperFromMaskedInt64x4, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Int64x8.ShiftAllRightAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllRightAndFillUpperFromMaskedInt64x8, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Uint16x8.ShiftAllRightAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllRightAndFillUpperFromMaskedUint16x8, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Uint16x16.ShiftAllRightAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllRightAndFillUpperFromMaskedUint16x16, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Uint16x32.ShiftAllRightAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllRightAndFillUpperFromMaskedUint16x32, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Uint32x4.ShiftAllRightAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllRightAndFillUpperFromMaskedUint32x4, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Uint32x8.ShiftAllRightAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllRightAndFillUpperFromMaskedUint32x8, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Uint32x16.ShiftAllRightAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllRightAndFillUpperFromMaskedUint32x16, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Uint64x2.ShiftAllRightAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllRightAndFillUpperFromMaskedUint64x2, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Uint64x4.ShiftAllRightAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllRightAndFillUpperFromMaskedUint64x4, types.TypeVec256, 0), sys.AMD64) + addF(simdPackage, "Uint64x8.ShiftAllRightAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllRightAndFillUpperFromMaskedUint64x8, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Int64x2.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x8.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint64x2.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x4.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint64x8.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int16x8.ShiftAllRightSignExtended", opLen2(ssa.OpShiftAllRightSignExtendedInt16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int16x16.ShiftAllRightSignExtended", opLen2(ssa.OpShiftAllRightSignExtendedInt16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int32x4.ShiftAllRightSignExtended", opLen2(ssa.OpShiftAllRightSignExtendedInt32x4, types.TypeVec128), sys.AMD64) @@ -1520,6 +1367,9 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Int64x2.ShiftAllRightSignExtended", opLen2(ssa.OpShiftAllRightSignExtendedInt64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int64x4.ShiftAllRightSignExtended", opLen2(ssa.OpShiftAllRightSignExtendedInt64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int64x8.ShiftAllRightSignExtended", opLen2(ssa.OpShiftAllRightSignExtendedInt64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int64x2.ShiftAllRightSignExtendedMasked", opLen3(ssa.OpShiftAllRightSignExtendedMaskedInt64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.ShiftAllRightSignExtendedMasked", opLen3(ssa.OpShiftAllRightSignExtendedMaskedInt64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x8.ShiftAllRightSignExtendedMasked", opLen3(ssa.OpShiftAllRightSignExtendedMaskedInt64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int16x8.ShiftLeft", opLen2(ssa.OpShiftLeftInt16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int16x16.ShiftLeft", opLen2(ssa.OpShiftLeftInt16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int16x32.ShiftLeft", opLen2(ssa.OpShiftLeftInt16x32, types.TypeVec512), sys.AMD64) @@ -1556,6 +1406,42 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Uint64x2.ShiftLeftAndFillUpperFrom", opLen3(ssa.OpShiftLeftAndFillUpperFromUint64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint64x4.ShiftLeftAndFillUpperFrom", opLen3(ssa.OpShiftLeftAndFillUpperFromUint64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint64x8.ShiftLeftAndFillUpperFrom", opLen3(ssa.OpShiftLeftAndFillUpperFromUint64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int16x8.ShiftLeftAndFillUpperFromMasked", opLen4(ssa.OpShiftLeftAndFillUpperFromMaskedInt16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x16.ShiftLeftAndFillUpperFromMasked", opLen4(ssa.OpShiftLeftAndFillUpperFromMaskedInt16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int16x32.ShiftLeftAndFillUpperFromMasked", opLen4(ssa.OpShiftLeftAndFillUpperFromMaskedInt16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int32x4.ShiftLeftAndFillUpperFromMasked", opLen4(ssa.OpShiftLeftAndFillUpperFromMaskedInt32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.ShiftLeftAndFillUpperFromMasked", opLen4(ssa.OpShiftLeftAndFillUpperFromMaskedInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x16.ShiftLeftAndFillUpperFromMasked", opLen4(ssa.OpShiftLeftAndFillUpperFromMaskedInt32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int64x2.ShiftLeftAndFillUpperFromMasked", opLen4(ssa.OpShiftLeftAndFillUpperFromMaskedInt64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.ShiftLeftAndFillUpperFromMasked", opLen4(ssa.OpShiftLeftAndFillUpperFromMaskedInt64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x8.ShiftLeftAndFillUpperFromMasked", opLen4(ssa.OpShiftLeftAndFillUpperFromMaskedInt64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint16x8.ShiftLeftAndFillUpperFromMasked", opLen4(ssa.OpShiftLeftAndFillUpperFromMaskedUint16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint16x16.ShiftLeftAndFillUpperFromMasked", opLen4(ssa.OpShiftLeftAndFillUpperFromMaskedUint16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint16x32.ShiftLeftAndFillUpperFromMasked", opLen4(ssa.OpShiftLeftAndFillUpperFromMaskedUint16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint32x4.ShiftLeftAndFillUpperFromMasked", opLen4(ssa.OpShiftLeftAndFillUpperFromMaskedUint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x8.ShiftLeftAndFillUpperFromMasked", opLen4(ssa.OpShiftLeftAndFillUpperFromMaskedUint32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x16.ShiftLeftAndFillUpperFromMasked", opLen4(ssa.OpShiftLeftAndFillUpperFromMaskedUint32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint64x2.ShiftLeftAndFillUpperFromMasked", opLen4(ssa.OpShiftLeftAndFillUpperFromMaskedUint64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x4.ShiftLeftAndFillUpperFromMasked", opLen4(ssa.OpShiftLeftAndFillUpperFromMaskedUint64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint64x8.ShiftLeftAndFillUpperFromMasked", opLen4(ssa.OpShiftLeftAndFillUpperFromMaskedUint64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int16x8.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedInt16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x16.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedInt16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int16x32.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedInt16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int32x4.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedInt32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x16.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedInt32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int64x2.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedInt64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedInt64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x8.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedInt64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint16x8.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedUint16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint16x16.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedUint16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint16x32.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedUint16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint32x4.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedUint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x8.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedUint32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x16.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedUint32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint64x2.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedUint64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x4.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedUint64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint64x8.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedUint64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int16x8.ShiftRight", opLen2(ssa.OpShiftRightInt16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int16x16.ShiftRight", opLen2(ssa.OpShiftRightInt16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int16x32.ShiftRight", opLen2(ssa.OpShiftRightInt16x32, types.TypeVec512), sys.AMD64) @@ -1592,6 +1478,42 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Uint64x2.ShiftRightAndFillUpperFrom", opLen3(ssa.OpShiftRightAndFillUpperFromUint64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint64x4.ShiftRightAndFillUpperFrom", opLen3(ssa.OpShiftRightAndFillUpperFromUint64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint64x8.ShiftRightAndFillUpperFrom", opLen3(ssa.OpShiftRightAndFillUpperFromUint64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int16x8.ShiftRightAndFillUpperFromMasked", opLen4(ssa.OpShiftRightAndFillUpperFromMaskedInt16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x16.ShiftRightAndFillUpperFromMasked", opLen4(ssa.OpShiftRightAndFillUpperFromMaskedInt16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int16x32.ShiftRightAndFillUpperFromMasked", opLen4(ssa.OpShiftRightAndFillUpperFromMaskedInt16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int32x4.ShiftRightAndFillUpperFromMasked", opLen4(ssa.OpShiftRightAndFillUpperFromMaskedInt32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.ShiftRightAndFillUpperFromMasked", opLen4(ssa.OpShiftRightAndFillUpperFromMaskedInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x16.ShiftRightAndFillUpperFromMasked", opLen4(ssa.OpShiftRightAndFillUpperFromMaskedInt32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int64x2.ShiftRightAndFillUpperFromMasked", opLen4(ssa.OpShiftRightAndFillUpperFromMaskedInt64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.ShiftRightAndFillUpperFromMasked", opLen4(ssa.OpShiftRightAndFillUpperFromMaskedInt64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x8.ShiftRightAndFillUpperFromMasked", opLen4(ssa.OpShiftRightAndFillUpperFromMaskedInt64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint16x8.ShiftRightAndFillUpperFromMasked", opLen4(ssa.OpShiftRightAndFillUpperFromMaskedUint16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint16x16.ShiftRightAndFillUpperFromMasked", opLen4(ssa.OpShiftRightAndFillUpperFromMaskedUint16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint16x32.ShiftRightAndFillUpperFromMasked", opLen4(ssa.OpShiftRightAndFillUpperFromMaskedUint16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint32x4.ShiftRightAndFillUpperFromMasked", opLen4(ssa.OpShiftRightAndFillUpperFromMaskedUint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x8.ShiftRightAndFillUpperFromMasked", opLen4(ssa.OpShiftRightAndFillUpperFromMaskedUint32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x16.ShiftRightAndFillUpperFromMasked", opLen4(ssa.OpShiftRightAndFillUpperFromMaskedUint32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint64x2.ShiftRightAndFillUpperFromMasked", opLen4(ssa.OpShiftRightAndFillUpperFromMaskedUint64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x4.ShiftRightAndFillUpperFromMasked", opLen4(ssa.OpShiftRightAndFillUpperFromMaskedUint64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint64x8.ShiftRightAndFillUpperFromMasked", opLen4(ssa.OpShiftRightAndFillUpperFromMaskedUint64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int16x8.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedInt16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x16.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedInt16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int16x32.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedInt16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int32x4.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedInt32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x16.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedInt32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int64x2.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedInt64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedInt64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x8.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedInt64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint16x8.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint16x16.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint16x32.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint32x4.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x8.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x16.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint64x2.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x4.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint64x8.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int16x8.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedInt16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int16x16.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedInt16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int16x32.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedInt16x32, types.TypeVec512), sys.AMD64) @@ -1610,6 +1532,24 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Uint64x2.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedUint64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint64x4.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedUint64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint64x8.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedUint64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int16x8.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedInt16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x16.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedInt16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int16x32.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedInt16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int32x4.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedInt32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x16.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedInt32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int64x2.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedInt64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedInt64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x8.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedInt64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint16x8.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedUint16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint16x16.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedUint16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint16x32.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedUint16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint32x4.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedUint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x8.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedUint32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x16.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedUint32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint64x2.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedUint64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x4.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedUint64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint64x8.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedUint64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int8x16.Sign", opLen2(ssa.OpSignInt8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int8x32.Sign", opLen2(ssa.OpSignInt8x32, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int16x8.Sign", opLen2(ssa.OpSignInt16x8, types.TypeVec128), sys.AMD64) @@ -1622,6 +1562,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Float64x2.Sqrt", opLen1(ssa.OpSqrtFloat64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float64x4.Sqrt", opLen1(ssa.OpSqrtFloat64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float64x8.Sqrt", opLen1(ssa.OpSqrtFloat64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Float32x4.SqrtMasked", opLen2(ssa.OpSqrtMaskedFloat32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float32x8.SqrtMasked", opLen2(ssa.OpSqrtMaskedFloat32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float32x16.SqrtMasked", opLen2(ssa.OpSqrtMaskedFloat32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Float64x2.SqrtMasked", opLen2(ssa.OpSqrtMaskedFloat64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float64x4.SqrtMasked", opLen2(ssa.OpSqrtMaskedFloat64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float64x8.SqrtMasked", opLen2(ssa.OpSqrtMaskedFloat64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float32x4.Sub", opLen2(ssa.OpSubFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x8.Sub", opLen2(ssa.OpSubFloat32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float32x16.Sub", opLen2(ssa.OpSubFloat32x16, types.TypeVec512), sys.AMD64) @@ -1652,6 +1598,36 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Uint64x2.Sub", opLen2(ssa.OpSubUint64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint64x4.Sub", opLen2(ssa.OpSubUint64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint64x8.Sub", opLen2(ssa.OpSubUint64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Float32x4.SubMasked", opLen3(ssa.OpSubMaskedFloat32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float32x8.SubMasked", opLen3(ssa.OpSubMaskedFloat32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float32x16.SubMasked", opLen3(ssa.OpSubMaskedFloat32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Float64x2.SubMasked", opLen3(ssa.OpSubMaskedFloat64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float64x4.SubMasked", opLen3(ssa.OpSubMaskedFloat64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float64x8.SubMasked", opLen3(ssa.OpSubMaskedFloat64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int8x16.SubMasked", opLen3(ssa.OpSubMaskedInt8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int8x32.SubMasked", opLen3(ssa.OpSubMaskedInt8x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int8x64.SubMasked", opLen3(ssa.OpSubMaskedInt8x64, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int16x8.SubMasked", opLen3(ssa.OpSubMaskedInt16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x16.SubMasked", opLen3(ssa.OpSubMaskedInt16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int16x32.SubMasked", opLen3(ssa.OpSubMaskedInt16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int32x4.SubMasked", opLen3(ssa.OpSubMaskedInt32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.SubMasked", opLen3(ssa.OpSubMaskedInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x16.SubMasked", opLen3(ssa.OpSubMaskedInt32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int64x2.SubMasked", opLen3(ssa.OpSubMaskedInt64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.SubMasked", opLen3(ssa.OpSubMaskedInt64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x8.SubMasked", opLen3(ssa.OpSubMaskedInt64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint8x16.SubMasked", opLen3(ssa.OpSubMaskedUint8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint8x32.SubMasked", opLen3(ssa.OpSubMaskedUint8x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint8x64.SubMasked", opLen3(ssa.OpSubMaskedUint8x64, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint16x8.SubMasked", opLen3(ssa.OpSubMaskedUint16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint16x16.SubMasked", opLen3(ssa.OpSubMaskedUint16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint16x32.SubMasked", opLen3(ssa.OpSubMaskedUint16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint32x4.SubMasked", opLen3(ssa.OpSubMaskedUint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x8.SubMasked", opLen3(ssa.OpSubMaskedUint32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x16.SubMasked", opLen3(ssa.OpSubMaskedUint32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint64x2.SubMasked", opLen3(ssa.OpSubMaskedUint64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x4.SubMasked", opLen3(ssa.OpSubMaskedUint64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint64x8.SubMasked", opLen3(ssa.OpSubMaskedUint64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float32x4.Trunc", opLen1(ssa.OpTruncFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x8.Trunc", opLen1(ssa.OpTruncFloat32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float64x2.Trunc", opLen1(ssa.OpTruncFloat64x2, types.TypeVec128), sys.AMD64) @@ -1662,12 +1638,24 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Float64x2.TruncWithPrecision", opLen1Imm8(ssa.OpTruncWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64) addF(simdPackage, "Float64x4.TruncWithPrecision", opLen1Imm8(ssa.OpTruncWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64) addF(simdPackage, "Float64x8.TruncWithPrecision", opLen1Imm8(ssa.OpTruncWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float32x4.TruncWithPrecisionMasked", opLen2Imm8(ssa.OpTruncWithPrecisionMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float32x8.TruncWithPrecisionMasked", opLen2Imm8(ssa.OpTruncWithPrecisionMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float32x16.TruncWithPrecisionMasked", opLen2Imm8(ssa.OpTruncWithPrecisionMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64) + addF(simdPackage, "Float64x2.TruncWithPrecisionMasked", opLen2Imm8(ssa.OpTruncWithPrecisionMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64) + addF(simdPackage, "Float64x4.TruncWithPrecisionMasked", opLen2Imm8(ssa.OpTruncWithPrecisionMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64) + addF(simdPackage, "Float64x8.TruncWithPrecisionMasked", opLen2Imm8(ssa.OpTruncWithPrecisionMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64) addF(simdPackage, "Int32x4.UnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpUnsignedSignedQuadDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int32x8.UnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpUnsignedSignedQuadDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int32x16.UnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpUnsignedSignedQuadDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint32x4.UnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpUnsignedSignedQuadDotProdAccumulateUint32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint32x8.UnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpUnsignedSignedQuadDotProdAccumulateUint32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint32x16.UnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpUnsignedSignedQuadDotProdAccumulateUint32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int32x4.UnsignedSignedQuadDotProdAccumulateMasked", opLen4(ssa.OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.UnsignedSignedQuadDotProdAccumulateMasked", opLen4(ssa.OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x16.UnsignedSignedQuadDotProdAccumulateMasked", opLen4(ssa.OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint32x4.UnsignedSignedQuadDotProdAccumulateMasked", opLen4(ssa.OpUnsignedSignedQuadDotProdAccumulateMaskedUint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x8.UnsignedSignedQuadDotProdAccumulateMasked", opLen4(ssa.OpUnsignedSignedQuadDotProdAccumulateMaskedUint32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x16.UnsignedSignedQuadDotProdAccumulateMasked", opLen4(ssa.OpUnsignedSignedQuadDotProdAccumulateMaskedUint32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int8x16.Xor", opLen2(ssa.OpXorInt8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int8x32.Xor", opLen2(ssa.OpXorInt8x32, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int16x8.Xor", opLen2(ssa.OpXorInt16x8, types.TypeVec128), sys.AMD64) @@ -1688,6 +1676,18 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Uint64x2.Xor", opLen2(ssa.OpXorUint64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint64x4.Xor", opLen2(ssa.OpXorUint64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint64x8.Xor", opLen2(ssa.OpXorUint64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int32x4.XorMasked", opLen3(ssa.OpXorMaskedInt32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.XorMasked", opLen3(ssa.OpXorMaskedInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x16.XorMasked", opLen3(ssa.OpXorMaskedInt32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int64x2.XorMasked", opLen3(ssa.OpXorMaskedInt64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x4.XorMasked", opLen3(ssa.OpXorMaskedInt64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x8.XorMasked", opLen3(ssa.OpXorMaskedInt64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint32x4.XorMasked", opLen3(ssa.OpXorMaskedUint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x8.XorMasked", opLen3(ssa.OpXorMaskedUint32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x16.XorMasked", opLen3(ssa.OpXorMaskedUint32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint64x2.XorMasked", opLen3(ssa.OpXorMaskedUint64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x4.XorMasked", opLen3(ssa.OpXorMaskedUint64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint64x8.XorMasked", opLen3(ssa.OpXorMaskedUint64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float32x4.AsFloat64x2", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) addF(simdPackage, "Float32x4.AsInt8x16", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) addF(simdPackage, "Float32x4.AsInt16x8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64) diff --git a/src/simd/ops_amd64.go b/src/simd/ops_amd64.go index fa99bba7bb..26a0d3e9ad 100644 --- a/src/simd/ops_amd64.go +++ b/src/simd/ops_amd64.go @@ -66,6 +66,68 @@ func (x Int64x4) Absolute() Int64x4 // Asm: VPABSQ, CPU Feature: AVX512EVEX func (x Int64x8) Absolute() Int64x8 +/* AbsoluteMasked */ + +// Absolute computes the absolute value of each element. +// +// Asm: VPABSB, CPU Feature: AVX512EVEX +func (x Int8x16) AbsoluteMasked(y Mask8x16) Int8x16 + +// Absolute computes the absolute value of each element. +// +// Asm: VPABSB, CPU Feature: AVX512EVEX +func (x Int8x32) AbsoluteMasked(y Mask8x32) Int8x32 + +// Absolute computes the absolute value of each element. +// +// Asm: VPABSB, CPU Feature: AVX512EVEX +func (x Int8x64) AbsoluteMasked(y Mask8x64) Int8x64 + +// Absolute computes the absolute value of each element. +// +// Asm: VPABSW, CPU Feature: AVX512EVEX +func (x Int16x8) AbsoluteMasked(y Mask16x8) Int16x8 + +// Absolute computes the absolute value of each element. +// +// Asm: VPABSW, CPU Feature: AVX512EVEX +func (x Int16x16) AbsoluteMasked(y Mask16x16) Int16x16 + +// Absolute computes the absolute value of each element. +// +// Asm: VPABSW, CPU Feature: AVX512EVEX +func (x Int16x32) AbsoluteMasked(y Mask16x32) Int16x32 + +// Absolute computes the absolute value of each element. +// +// Asm: VPABSD, CPU Feature: AVX512EVEX +func (x Int32x4) AbsoluteMasked(y Mask32x4) Int32x4 + +// Absolute computes the absolute value of each element. +// +// Asm: VPABSD, CPU Feature: AVX512EVEX +func (x Int32x8) AbsoluteMasked(y Mask32x8) Int32x8 + +// Absolute computes the absolute value of each element. +// +// Asm: VPABSD, CPU Feature: AVX512EVEX +func (x Int32x16) AbsoluteMasked(y Mask32x16) Int32x16 + +// Absolute computes the absolute value of each element. +// +// Asm: VPABSQ, CPU Feature: AVX512EVEX +func (x Int64x2) AbsoluteMasked(y Mask64x2) Int64x2 + +// Absolute computes the absolute value of each element. +// +// Asm: VPABSQ, CPU Feature: AVX512EVEX +func (x Int64x4) AbsoluteMasked(y Mask64x4) Int64x4 + +// Absolute computes the absolute value of each element. +// +// Asm: VPABSQ, CPU Feature: AVX512EVEX +func (x Int64x8) AbsoluteMasked(y Mask64x8) Int64x8 + /* Add */ // Add adds corresponding elements of two vectors. @@ -218,6 +280,158 @@ func (x Uint64x4) Add(y Uint64x4) Uint64x4 // Asm: VPADDQ, CPU Feature: AVX512EVEX func (x Uint64x8) Add(y Uint64x8) Uint64x8 +/* AddMasked */ + +// Add adds corresponding elements of two vectors. +// +// Asm: VADDPS, CPU Feature: AVX512EVEX +func (x Float32x4) AddMasked(y Float32x4, z Mask32x4) Float32x4 + +// Add adds corresponding elements of two vectors. +// +// Asm: VADDPS, CPU Feature: AVX512EVEX +func (x Float32x8) AddMasked(y Float32x8, z Mask32x8) Float32x8 + +// Add adds corresponding elements of two vectors. +// +// Asm: VADDPS, CPU Feature: AVX512EVEX +func (x Float32x16) AddMasked(y Float32x16, z Mask32x16) Float32x16 + +// Add adds corresponding elements of two vectors. +// +// Asm: VADDPD, CPU Feature: AVX512EVEX +func (x Float64x2) AddMasked(y Float64x2, z Mask64x2) Float64x2 + +// Add adds corresponding elements of two vectors. +// +// Asm: VADDPD, CPU Feature: AVX512EVEX +func (x Float64x4) AddMasked(y Float64x4, z Mask64x4) Float64x4 + +// Add adds corresponding elements of two vectors. +// +// Asm: VADDPD, CPU Feature: AVX512EVEX +func (x Float64x8) AddMasked(y Float64x8, z Mask64x8) Float64x8 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDB, CPU Feature: AVX512EVEX +func (x Int8x16) AddMasked(y Int8x16, z Mask8x16) Int8x16 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDB, CPU Feature: AVX512EVEX +func (x Int8x32) AddMasked(y Int8x32, z Mask8x32) Int8x32 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDB, CPU Feature: AVX512EVEX +func (x Int8x64) AddMasked(y Int8x64, z Mask8x64) Int8x64 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDW, CPU Feature: AVX512EVEX +func (x Int16x8) AddMasked(y Int16x8, z Mask16x8) Int16x8 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDW, CPU Feature: AVX512EVEX +func (x Int16x16) AddMasked(y Int16x16, z Mask16x16) Int16x16 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDW, CPU Feature: AVX512EVEX +func (x Int16x32) AddMasked(y Int16x32, z Mask16x32) Int16x32 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDD, CPU Feature: AVX512EVEX +func (x Int32x4) AddMasked(y Int32x4, z Mask32x4) Int32x4 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDD, CPU Feature: AVX512EVEX +func (x Int32x8) AddMasked(y Int32x8, z Mask32x8) Int32x8 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDD, CPU Feature: AVX512EVEX +func (x Int32x16) AddMasked(y Int32x16, z Mask32x16) Int32x16 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDQ, CPU Feature: AVX512EVEX +func (x Int64x2) AddMasked(y Int64x2, z Mask64x2) Int64x2 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDQ, CPU Feature: AVX512EVEX +func (x Int64x4) AddMasked(y Int64x4, z Mask64x4) Int64x4 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDQ, CPU Feature: AVX512EVEX +func (x Int64x8) AddMasked(y Int64x8, z Mask64x8) Int64x8 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDB, CPU Feature: AVX512EVEX +func (x Uint8x16) AddMasked(y Uint8x16, z Mask8x16) Uint8x16 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDB, CPU Feature: AVX512EVEX +func (x Uint8x32) AddMasked(y Uint8x32, z Mask8x32) Uint8x32 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDB, CPU Feature: AVX512EVEX +func (x Uint8x64) AddMasked(y Uint8x64, z Mask8x64) Uint8x64 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDW, CPU Feature: AVX512EVEX +func (x Uint16x8) AddMasked(y Uint16x8, z Mask16x8) Uint16x8 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDW, CPU Feature: AVX512EVEX +func (x Uint16x16) AddMasked(y Uint16x16, z Mask16x16) Uint16x16 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDW, CPU Feature: AVX512EVEX +func (x Uint16x32) AddMasked(y Uint16x32, z Mask16x32) Uint16x32 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDD, CPU Feature: AVX512EVEX +func (x Uint32x4) AddMasked(y Uint32x4, z Mask32x4) Uint32x4 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDD, CPU Feature: AVX512EVEX +func (x Uint32x8) AddMasked(y Uint32x8, z Mask32x8) Uint32x8 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDD, CPU Feature: AVX512EVEX +func (x Uint32x16) AddMasked(y Uint32x16, z Mask32x16) Uint32x16 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDQ, CPU Feature: AVX512EVEX +func (x Uint64x2) AddMasked(y Uint64x2, z Mask64x2) Uint64x2 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDQ, CPU Feature: AVX512EVEX +func (x Uint64x4) AddMasked(y Uint64x4, z Mask64x4) Uint64x4 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDQ, CPU Feature: AVX512EVEX +func (x Uint64x8) AddMasked(y Uint64x8, z Mask64x8) Uint64x8 + /* AddSub */ // AddSub subtracts even elements and adds odd elements of two vectors. @@ -342,6 +556,68 @@ func (x Uint64x4) And(y Uint64x4) Uint64x4 // Asm: VPANDQ, CPU Feature: AVX512EVEX func (x Uint64x8) And(y Uint64x8) Uint64x8 +/* AndMasked */ + +// And performs a masked bitwise AND operation between two vectors. +// +// Asm: VPANDD, CPU Feature: AVX512EVEX +func (x Int32x4) AndMasked(y Int32x4, z Mask32x4) Int32x4 + +// And performs a masked bitwise AND operation between two vectors. +// +// Asm: VPANDD, CPU Feature: AVX512EVEX +func (x Int32x8) AndMasked(y Int32x8, z Mask32x8) Int32x8 + +// And performs a masked bitwise AND operation between two vectors. +// +// Asm: VPANDD, CPU Feature: AVX512EVEX +func (x Int32x16) AndMasked(y Int32x16, z Mask32x16) Int32x16 + +// And performs a masked bitwise AND operation between two vectors. +// +// Asm: VPANDQ, CPU Feature: AVX512EVEX +func (x Int64x2) AndMasked(y Int64x2, z Mask64x2) Int64x2 + +// And performs a masked bitwise AND operation between two vectors. +// +// Asm: VPANDQ, CPU Feature: AVX512EVEX +func (x Int64x4) AndMasked(y Int64x4, z Mask64x4) Int64x4 + +// And performs a masked bitwise AND operation between two vectors. +// +// Asm: VPANDQ, CPU Feature: AVX512EVEX +func (x Int64x8) AndMasked(y Int64x8, z Mask64x8) Int64x8 + +// And performs a masked bitwise AND operation between two vectors. +// +// Asm: VPANDD, CPU Feature: AVX512EVEX +func (x Uint32x4) AndMasked(y Uint32x4, z Mask32x4) Uint32x4 + +// And performs a masked bitwise AND operation between two vectors. +// +// Asm: VPANDD, CPU Feature: AVX512EVEX +func (x Uint32x8) AndMasked(y Uint32x8, z Mask32x8) Uint32x8 + +// And performs a masked bitwise AND operation between two vectors. +// +// Asm: VPANDD, CPU Feature: AVX512EVEX +func (x Uint32x16) AndMasked(y Uint32x16, z Mask32x16) Uint32x16 + +// And performs a masked bitwise AND operation between two vectors. +// +// Asm: VPANDQ, CPU Feature: AVX512EVEX +func (x Uint64x2) AndMasked(y Uint64x2, z Mask64x2) Uint64x2 + +// And performs a masked bitwise AND operation between two vectors. +// +// Asm: VPANDQ, CPU Feature: AVX512EVEX +func (x Uint64x4) AndMasked(y Uint64x4, z Mask64x4) Uint64x4 + +// And performs a masked bitwise AND operation between two vectors. +// +// Asm: VPANDQ, CPU Feature: AVX512EVEX +func (x Uint64x8) AndMasked(y Uint64x8, z Mask64x8) Uint64x8 + /* AndNot */ // AndNot performs a bitwise AND NOT operation between two vectors. @@ -444,41 +720,135 @@ func (x Uint64x4) AndNot(y Uint64x4) Uint64x4 // Asm: VPANDNQ, CPU Feature: AVX512EVEX func (x Uint64x8) AndNot(y Uint64x8) Uint64x8 -/* ApproximateReciprocal */ +/* AndNotMasked */ -// ApproximateReciprocal computes an approximate reciprocal of each element. +// AndNot performs a masked bitwise AND NOT operation between two vectors. // -// Asm: VRCP14PS, CPU Feature: AVX512EVEX -func (x Float32x4) ApproximateReciprocal() Float32x4 +// Asm: VPANDND, CPU Feature: AVX512EVEX +func (x Int32x4) AndNotMasked(y Int32x4, z Mask32x4) Int32x4 -// ApproximateReciprocal computes an approximate reciprocal of each element. +// AndNot performs a masked bitwise AND NOT operation between two vectors. // -// Asm: VRCP14PS, CPU Feature: AVX512EVEX -func (x Float32x8) ApproximateReciprocal() Float32x8 +// Asm: VPANDND, CPU Feature: AVX512EVEX +func (x Int32x8) AndNotMasked(y Int32x8, z Mask32x8) Int32x8 -// ApproximateReciprocal computes an approximate reciprocal of each element. +// AndNot performs a masked bitwise AND NOT operation between two vectors. // -// Asm: VRCP14PS, CPU Feature: AVX512EVEX -func (x Float32x16) ApproximateReciprocal() Float32x16 +// Asm: VPANDND, CPU Feature: AVX512EVEX +func (x Int32x16) AndNotMasked(y Int32x16, z Mask32x16) Int32x16 -// ApproximateReciprocal computes an approximate reciprocal of each element. +// AndNot performs a masked bitwise AND NOT operation between two vectors. // -// Asm: VRCP14PD, CPU Feature: AVX512EVEX -func (x Float64x2) ApproximateReciprocal() Float64x2 +// Asm: VPANDNQ, CPU Feature: AVX512EVEX +func (x Int64x2) AndNotMasked(y Int64x2, z Mask64x2) Int64x2 -// ApproximateReciprocal computes an approximate reciprocal of each element. +// AndNot performs a masked bitwise AND NOT operation between two vectors. // -// Asm: VRCP14PD, CPU Feature: AVX512EVEX -func (x Float64x4) ApproximateReciprocal() Float64x4 +// Asm: VPANDNQ, CPU Feature: AVX512EVEX +func (x Int64x4) AndNotMasked(y Int64x4, z Mask64x4) Int64x4 -// ApproximateReciprocal computes an approximate reciprocal of each element. +// AndNot performs a masked bitwise AND NOT operation between two vectors. // -// Asm: VRCP14PD, CPU Feature: AVX512EVEX -func (x Float64x8) ApproximateReciprocal() Float64x8 +// Asm: VPANDNQ, CPU Feature: AVX512EVEX +func (x Int64x8) AndNotMasked(y Int64x8, z Mask64x8) Int64x8 -/* ApproximateReciprocalOfSqrt */ +// AndNot performs a masked bitwise AND NOT operation between two vectors. +// +// Asm: VPANDND, CPU Feature: AVX512EVEX +func (x Uint32x4) AndNotMasked(y Uint32x4, z Mask32x4) Uint32x4 -// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element. +// AndNot performs a masked bitwise AND NOT operation between two vectors. +// +// Asm: VPANDND, CPU Feature: AVX512EVEX +func (x Uint32x8) AndNotMasked(y Uint32x8, z Mask32x8) Uint32x8 + +// AndNot performs a masked bitwise AND NOT operation between two vectors. +// +// Asm: VPANDND, CPU Feature: AVX512EVEX +func (x Uint32x16) AndNotMasked(y Uint32x16, z Mask32x16) Uint32x16 + +// AndNot performs a masked bitwise AND NOT operation between two vectors. +// +// Asm: VPANDNQ, CPU Feature: AVX512EVEX +func (x Uint64x2) AndNotMasked(y Uint64x2, z Mask64x2) Uint64x2 + +// AndNot performs a masked bitwise AND NOT operation between two vectors. +// +// Asm: VPANDNQ, CPU Feature: AVX512EVEX +func (x Uint64x4) AndNotMasked(y Uint64x4, z Mask64x4) Uint64x4 + +// AndNot performs a masked bitwise AND NOT operation between two vectors. +// +// Asm: VPANDNQ, CPU Feature: AVX512EVEX +func (x Uint64x8) AndNotMasked(y Uint64x8, z Mask64x8) Uint64x8 + +/* ApproximateReciprocal */ + +// ApproximateReciprocal computes an approximate reciprocal of each element. +// +// Asm: VRCP14PS, CPU Feature: AVX512EVEX +func (x Float32x4) ApproximateReciprocal() Float32x4 + +// ApproximateReciprocal computes an approximate reciprocal of each element. +// +// Asm: VRCP14PS, CPU Feature: AVX512EVEX +func (x Float32x8) ApproximateReciprocal() Float32x8 + +// ApproximateReciprocal computes an approximate reciprocal of each element. +// +// Asm: VRCP14PS, CPU Feature: AVX512EVEX +func (x Float32x16) ApproximateReciprocal() Float32x16 + +// ApproximateReciprocal computes an approximate reciprocal of each element. +// +// Asm: VRCP14PD, CPU Feature: AVX512EVEX +func (x Float64x2) ApproximateReciprocal() Float64x2 + +// ApproximateReciprocal computes an approximate reciprocal of each element. +// +// Asm: VRCP14PD, CPU Feature: AVX512EVEX +func (x Float64x4) ApproximateReciprocal() Float64x4 + +// ApproximateReciprocal computes an approximate reciprocal of each element. +// +// Asm: VRCP14PD, CPU Feature: AVX512EVEX +func (x Float64x8) ApproximateReciprocal() Float64x8 + +/* ApproximateReciprocalMasked */ + +// ApproximateReciprocal computes an approximate reciprocal of each element. +// +// Asm: VRCP14PS, CPU Feature: AVX512EVEX +func (x Float32x4) ApproximateReciprocalMasked(y Mask32x4) Float32x4 + +// ApproximateReciprocal computes an approximate reciprocal of each element. +// +// Asm: VRCP14PS, CPU Feature: AVX512EVEX +func (x Float32x8) ApproximateReciprocalMasked(y Mask32x8) Float32x8 + +// ApproximateReciprocal computes an approximate reciprocal of each element. +// +// Asm: VRCP14PS, CPU Feature: AVX512EVEX +func (x Float32x16) ApproximateReciprocalMasked(y Mask32x16) Float32x16 + +// ApproximateReciprocal computes an approximate reciprocal of each element. +// +// Asm: VRCP14PD, CPU Feature: AVX512EVEX +func (x Float64x2) ApproximateReciprocalMasked(y Mask64x2) Float64x2 + +// ApproximateReciprocal computes an approximate reciprocal of each element. +// +// Asm: VRCP14PD, CPU Feature: AVX512EVEX +func (x Float64x4) ApproximateReciprocalMasked(y Mask64x4) Float64x4 + +// ApproximateReciprocal computes an approximate reciprocal of each element. +// +// Asm: VRCP14PD, CPU Feature: AVX512EVEX +func (x Float64x8) ApproximateReciprocalMasked(y Mask64x8) Float64x8 + +/* ApproximateReciprocalOfSqrt */ + +// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element. // // Asm: VRSQRTPS, CPU Feature: AVX func (x Float32x4) ApproximateReciprocalOfSqrt() Float32x4 @@ -508,6 +878,38 @@ func (x Float64x4) ApproximateReciprocalOfSqrt() Float64x4 // Asm: VRSQRT14PD, CPU Feature: AVX512EVEX func (x Float64x8) ApproximateReciprocalOfSqrt() Float64x8 +/* ApproximateReciprocalOfSqrtMasked */ + +// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element. +// +// Asm: VRSQRT14PS, CPU Feature: AVX512EVEX +func (x Float32x4) ApproximateReciprocalOfSqrtMasked(y Mask32x4) Float32x4 + +// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element. +// +// Asm: VRSQRT14PS, CPU Feature: AVX512EVEX +func (x Float32x8) ApproximateReciprocalOfSqrtMasked(y Mask32x8) Float32x8 + +// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element. +// +// Asm: VRSQRT14PS, CPU Feature: AVX512EVEX +func (x Float32x16) ApproximateReciprocalOfSqrtMasked(y Mask32x16) Float32x16 + +// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element. +// +// Asm: VRSQRT14PD, CPU Feature: AVX512EVEX +func (x Float64x2) ApproximateReciprocalOfSqrtMasked(y Mask64x2) Float64x2 + +// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element. +// +// Asm: VRSQRT14PD, CPU Feature: AVX512EVEX +func (x Float64x4) ApproximateReciprocalOfSqrtMasked(y Mask64x4) Float64x4 + +// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element. +// +// Asm: VRSQRT14PD, CPU Feature: AVX512EVEX +func (x Float64x8) ApproximateReciprocalOfSqrtMasked(y Mask64x8) Float64x8 + /* Average */ // Average computes the rounded average of corresponding elements. @@ -540,6 +942,38 @@ func (x Uint16x16) Average(y Uint16x16) Uint16x16 // Asm: VPAVGW, CPU Feature: AVX512EVEX func (x Uint16x32) Average(y Uint16x32) Uint16x32 +/* AverageMasked */ + +// Average computes the rounded average of corresponding elements. +// +// Asm: VPAVGB, CPU Feature: AVX512EVEX +func (x Uint8x16) AverageMasked(y Uint8x16, z Mask8x16) Uint8x16 + +// Average computes the rounded average of corresponding elements. +// +// Asm: VPAVGB, CPU Feature: AVX512EVEX +func (x Uint8x32) AverageMasked(y Uint8x32, z Mask8x32) Uint8x32 + +// Average computes the rounded average of corresponding elements. +// +// Asm: VPAVGB, CPU Feature: AVX512EVEX +func (x Uint8x64) AverageMasked(y Uint8x64, z Mask8x64) Uint8x64 + +// Average computes the rounded average of corresponding elements. +// +// Asm: VPAVGW, CPU Feature: AVX512EVEX +func (x Uint16x8) AverageMasked(y Uint16x8, z Mask16x8) Uint16x8 + +// Average computes the rounded average of corresponding elements. +// +// Asm: VPAVGW, CPU Feature: AVX512EVEX +func (x Uint16x16) AverageMasked(y Uint16x16, z Mask16x16) Uint16x16 + +// Average computes the rounded average of corresponding elements. +// +// Asm: VPAVGW, CPU Feature: AVX512EVEX +func (x Uint16x32) AverageMasked(y Uint16x32, z Mask16x32) Uint16x32 + /* Ceil */ // Ceil rounds elements up to the nearest integer. @@ -594,6 +1028,38 @@ func (x Float64x4) CeilWithPrecision(imm uint8) Float64x4 // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX func (x Float64x8) CeilWithPrecision(imm uint8) Float64x8 +/* CeilWithPrecisionMasked */ + +// CeilWithPrecision rounds elements up with specified precision, masked. +// +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x4) CeilWithPrecisionMasked(imm uint8, y Mask32x4) Float32x4 + +// CeilWithPrecision rounds elements up with specified precision, masked. +// +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x8) CeilWithPrecisionMasked(imm uint8, y Mask32x8) Float32x8 + +// CeilWithPrecision rounds elements up with specified precision, masked. +// +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x16) CeilWithPrecisionMasked(imm uint8, y Mask32x16) Float32x16 + +// CeilWithPrecision rounds elements up with specified precision, masked. +// +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x2) CeilWithPrecisionMasked(imm uint8, y Mask64x2) Float64x2 + +// CeilWithPrecision rounds elements up with specified precision, masked. +// +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x4) CeilWithPrecisionMasked(imm uint8, y Mask64x4) Float64x4 + +// CeilWithPrecision rounds elements up with specified precision, masked. +// +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x8) CeilWithPrecisionMasked(imm uint8, y Mask64x8) Float64x8 + /* DiffWithCeilWithPrecision */ // DiffWithCeilWithPrecision computes the difference after ceiling with specified precision. @@ -626,6 +1092,38 @@ func (x Float64x4) DiffWithCeilWithPrecision(imm uint8) Float64x4 // Asm: VREDUCEPD, CPU Feature: AVX512EVEX func (x Float64x8) DiffWithCeilWithPrecision(imm uint8) Float64x8 +/* DiffWithCeilWithPrecisionMasked */ + +// DiffWithCeilWithPrecision computes the difference after ceiling with specified precision. +// +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x4) DiffWithCeilWithPrecisionMasked(imm uint8, y Mask32x4) Float32x4 + +// DiffWithCeilWithPrecision computes the difference after ceiling with specified precision. +// +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x8) DiffWithCeilWithPrecisionMasked(imm uint8, y Mask32x8) Float32x8 + +// DiffWithCeilWithPrecision computes the difference after ceiling with specified precision. +// +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x16) DiffWithCeilWithPrecisionMasked(imm uint8, y Mask32x16) Float32x16 + +// DiffWithCeilWithPrecision computes the difference after ceiling with specified precision. +// +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x2) DiffWithCeilWithPrecisionMasked(imm uint8, y Mask64x2) Float64x2 + +// DiffWithCeilWithPrecision computes the difference after ceiling with specified precision. +// +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x4) DiffWithCeilWithPrecisionMasked(imm uint8, y Mask64x4) Float64x4 + +// DiffWithCeilWithPrecision computes the difference after ceiling with specified precision. +// +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x8) DiffWithCeilWithPrecisionMasked(imm uint8, y Mask64x8) Float64x8 + /* DiffWithFloorWithPrecision */ // DiffWithFloorWithPrecision computes the difference after flooring with specified precision. @@ -658,6 +1156,38 @@ func (x Float64x4) DiffWithFloorWithPrecision(imm uint8) Float64x4 // Asm: VREDUCEPD, CPU Feature: AVX512EVEX func (x Float64x8) DiffWithFloorWithPrecision(imm uint8) Float64x8 +/* DiffWithFloorWithPrecisionMasked */ + +// DiffWithFloorWithPrecision computes the difference after flooring with specified precision. +// +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x4) DiffWithFloorWithPrecisionMasked(imm uint8, y Mask32x4) Float32x4 + +// DiffWithFloorWithPrecision computes the difference after flooring with specified precision. +// +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x8) DiffWithFloorWithPrecisionMasked(imm uint8, y Mask32x8) Float32x8 + +// DiffWithFloorWithPrecision computes the difference after flooring with specified precision. +// +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x16) DiffWithFloorWithPrecisionMasked(imm uint8, y Mask32x16) Float32x16 + +// DiffWithFloorWithPrecision computes the difference after flooring with specified precision. +// +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x2) DiffWithFloorWithPrecisionMasked(imm uint8, y Mask64x2) Float64x2 + +// DiffWithFloorWithPrecision computes the difference after flooring with specified precision. +// +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x4) DiffWithFloorWithPrecisionMasked(imm uint8, y Mask64x4) Float64x4 + +// DiffWithFloorWithPrecision computes the difference after flooring with specified precision. +// +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x8) DiffWithFloorWithPrecisionMasked(imm uint8, y Mask64x8) Float64x8 + /* DiffWithRoundWithPrecision */ // DiffWithRoundWithPrecision computes the difference after rounding with specified precision. @@ -690,6 +1220,38 @@ func (x Float64x4) DiffWithRoundWithPrecision(imm uint8) Float64x4 // Asm: VREDUCEPD, CPU Feature: AVX512EVEX func (x Float64x8) DiffWithRoundWithPrecision(imm uint8) Float64x8 +/* DiffWithRoundWithPrecisionMasked */ + +// DiffWithRoundWithPrecision computes the difference after rounding with specified precision. +// +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x4) DiffWithRoundWithPrecisionMasked(imm uint8, y Mask32x4) Float32x4 + +// DiffWithRoundWithPrecision computes the difference after rounding with specified precision. +// +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x8) DiffWithRoundWithPrecisionMasked(imm uint8, y Mask32x8) Float32x8 + +// DiffWithRoundWithPrecision computes the difference after rounding with specified precision. +// +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x16) DiffWithRoundWithPrecisionMasked(imm uint8, y Mask32x16) Float32x16 + +// DiffWithRoundWithPrecision computes the difference after rounding with specified precision. +// +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x2) DiffWithRoundWithPrecisionMasked(imm uint8, y Mask64x2) Float64x2 + +// DiffWithRoundWithPrecision computes the difference after rounding with specified precision. +// +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x4) DiffWithRoundWithPrecisionMasked(imm uint8, y Mask64x4) Float64x4 + +// DiffWithRoundWithPrecision computes the difference after rounding with specified precision. +// +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x8) DiffWithRoundWithPrecisionMasked(imm uint8, y Mask64x8) Float64x8 + /* DiffWithTruncWithPrecision */ // DiffWithTruncWithPrecision computes the difference after truncating with specified precision. @@ -722,6 +1284,38 @@ func (x Float64x4) DiffWithTruncWithPrecision(imm uint8) Float64x4 // Asm: VREDUCEPD, CPU Feature: AVX512EVEX func (x Float64x8) DiffWithTruncWithPrecision(imm uint8) Float64x8 +/* DiffWithTruncWithPrecisionMasked */ + +// DiffWithTruncWithPrecision computes the difference after truncating with specified precision. +// +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x4) DiffWithTruncWithPrecisionMasked(imm uint8, y Mask32x4) Float32x4 + +// DiffWithTruncWithPrecision computes the difference after truncating with specified precision. +// +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x8) DiffWithTruncWithPrecisionMasked(imm uint8, y Mask32x8) Float32x8 + +// DiffWithTruncWithPrecision computes the difference after truncating with specified precision. +// +// Asm: VREDUCEPS, CPU Feature: AVX512EVEX +func (x Float32x16) DiffWithTruncWithPrecisionMasked(imm uint8, y Mask32x16) Float32x16 + +// DiffWithTruncWithPrecision computes the difference after truncating with specified precision. +// +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x2) DiffWithTruncWithPrecisionMasked(imm uint8, y Mask64x2) Float64x2 + +// DiffWithTruncWithPrecision computes the difference after truncating with specified precision. +// +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x4) DiffWithTruncWithPrecisionMasked(imm uint8, y Mask64x4) Float64x4 + +// DiffWithTruncWithPrecision computes the difference after truncating with specified precision. +// +// Asm: VREDUCEPD, CPU Feature: AVX512EVEX +func (x Float64x8) DiffWithTruncWithPrecisionMasked(imm uint8, y Mask64x8) Float64x8 + /* Div */ // Div divides elements of two vectors. @@ -754,9 +1348,41 @@ func (x Float64x4) Div(y Float64x4) Float64x4 // Asm: VDIVPD, CPU Feature: AVX512EVEX func (x Float64x8) Div(y Float64x8) Float64x8 -/* DotProdBroadcast */ +/* DivMasked */ -// DotProdBroadcast multiplies all elements and broadcasts the sum. +// Div divides elements of two vectors. +// +// Asm: VDIVPS, CPU Feature: AVX512EVEX +func (x Float32x4) DivMasked(y Float32x4, z Mask32x4) Float32x4 + +// Div divides elements of two vectors. +// +// Asm: VDIVPS, CPU Feature: AVX512EVEX +func (x Float32x8) DivMasked(y Float32x8, z Mask32x8) Float32x8 + +// Div divides elements of two vectors. +// +// Asm: VDIVPS, CPU Feature: AVX512EVEX +func (x Float32x16) DivMasked(y Float32x16, z Mask32x16) Float32x16 + +// Div divides elements of two vectors. +// +// Asm: VDIVPD, CPU Feature: AVX512EVEX +func (x Float64x2) DivMasked(y Float64x2, z Mask64x2) Float64x2 + +// Div divides elements of two vectors. +// +// Asm: VDIVPD, CPU Feature: AVX512EVEX +func (x Float64x4) DivMasked(y Float64x4, z Mask64x4) Float64x4 + +// Div divides elements of two vectors. +// +// Asm: VDIVPD, CPU Feature: AVX512EVEX +func (x Float64x8) DivMasked(y Float64x8, z Mask64x8) Float64x8 + +/* DotProdBroadcast */ + +// DotProdBroadcast multiplies all elements and broadcasts the sum. // // Asm: VDPPD, CPU Feature: AVX func (x Float64x2) DotProdBroadcast(y Float64x2) Float64x2 @@ -913,6 +1539,158 @@ func (x Uint64x4) Equal(y Uint64x4) Mask64x4 // Asm: VPCMPUQ, CPU Feature: AVX512EVEX func (x Uint64x8) Equal(y Uint64x8) Mask64x8 +/* EqualMasked */ + +// Equal compares for equality, masked. +// +// Asm: VCMPPS, CPU Feature: AVX512EVEX +func (x Float32x4) EqualMasked(y Float32x4, z Mask32x4) Mask32x4 + +// Equal compares for equality, masked. +// +// Asm: VCMPPS, CPU Feature: AVX512EVEX +func (x Float32x8) EqualMasked(y Float32x8, z Mask32x8) Mask32x8 + +// Equal compares for equality, masked. +// +// Asm: VCMPPS, CPU Feature: AVX512EVEX +func (x Float32x16) EqualMasked(y Float32x16, z Mask32x16) Mask32x16 + +// Equal compares for equality, masked. +// +// Asm: VCMPPD, CPU Feature: AVX512EVEX +func (x Float64x2) EqualMasked(y Float64x2, z Mask64x2) Mask64x2 + +// Equal compares for equality, masked. +// +// Asm: VCMPPD, CPU Feature: AVX512EVEX +func (x Float64x4) EqualMasked(y Float64x4, z Mask64x4) Mask64x4 + +// Equal compares for equality, masked. +// +// Asm: VCMPPD, CPU Feature: AVX512EVEX +func (x Float64x8) EqualMasked(y Float64x8, z Mask64x8) Mask64x8 + +// Equal compares for equality, masked. +// +// Asm: VPCMPB, CPU Feature: AVX512EVEX +func (x Int8x16) EqualMasked(y Int8x16, z Mask8x16) Mask8x16 + +// Equal compares for equality, masked. +// +// Asm: VPCMPB, CPU Feature: AVX512EVEX +func (x Int8x32) EqualMasked(y Int8x32, z Mask8x32) Mask8x32 + +// Equal compares for equality, masked. +// +// Asm: VPCMPB, CPU Feature: AVX512EVEX +func (x Int8x64) EqualMasked(y Int8x64, z Mask8x64) Mask8x64 + +// Equal compares for equality, masked. +// +// Asm: VPCMPW, CPU Feature: AVX512EVEX +func (x Int16x8) EqualMasked(y Int16x8, z Mask16x8) Mask16x8 + +// Equal compares for equality, masked. +// +// Asm: VPCMPW, CPU Feature: AVX512EVEX +func (x Int16x16) EqualMasked(y Int16x16, z Mask16x16) Mask16x16 + +// Equal compares for equality, masked. +// +// Asm: VPCMPW, CPU Feature: AVX512EVEX +func (x Int16x32) EqualMasked(y Int16x32, z Mask16x32) Mask16x32 + +// Equal compares for equality, masked. +// +// Asm: VPCMPD, CPU Feature: AVX512EVEX +func (x Int32x4) EqualMasked(y Int32x4, z Mask32x4) Mask32x4 + +// Equal compares for equality, masked. +// +// Asm: VPCMPD, CPU Feature: AVX512EVEX +func (x Int32x8) EqualMasked(y Int32x8, z Mask32x8) Mask32x8 + +// Equal compares for equality, masked. +// +// Asm: VPCMPD, CPU Feature: AVX512EVEX +func (x Int32x16) EqualMasked(y Int32x16, z Mask32x16) Mask32x16 + +// Equal compares for equality, masked. +// +// Asm: VPCMPQ, CPU Feature: AVX512EVEX +func (x Int64x2) EqualMasked(y Int64x2, z Mask64x2) Mask64x2 + +// Equal compares for equality, masked. +// +// Asm: VPCMPQ, CPU Feature: AVX512EVEX +func (x Int64x4) EqualMasked(y Int64x4, z Mask64x4) Mask64x4 + +// Equal compares for equality, masked. +// +// Asm: VPCMPQ, CPU Feature: AVX512EVEX +func (x Int64x8) EqualMasked(y Int64x8, z Mask64x8) Mask64x8 + +// Equal compares for equality, masked. +// +// Asm: VPCMPUB, CPU Feature: AVX512EVEX +func (x Uint8x16) EqualMasked(y Uint8x16, z Mask8x16) Mask8x16 + +// Equal compares for equality, masked. +// +// Asm: VPCMPUB, CPU Feature: AVX512EVEX +func (x Uint8x32) EqualMasked(y Uint8x32, z Mask8x32) Mask8x32 + +// Equal compares for equality, masked. +// +// Asm: VPCMPUB, CPU Feature: AVX512EVEX +func (x Uint8x64) EqualMasked(y Uint8x64, z Mask8x64) Mask8x64 + +// Equal compares for equality, masked. +// +// Asm: VPCMPUW, CPU Feature: AVX512EVEX +func (x Uint16x8) EqualMasked(y Uint16x8, z Mask16x8) Mask16x8 + +// Equal compares for equality, masked. +// +// Asm: VPCMPUW, CPU Feature: AVX512EVEX +func (x Uint16x16) EqualMasked(y Uint16x16, z Mask16x16) Mask16x16 + +// Equal compares for equality, masked. +// +// Asm: VPCMPUW, CPU Feature: AVX512EVEX +func (x Uint16x32) EqualMasked(y Uint16x32, z Mask16x32) Mask16x32 + +// Equal compares for equality, masked. +// +// Asm: VPCMPUD, CPU Feature: AVX512EVEX +func (x Uint32x4) EqualMasked(y Uint32x4, z Mask32x4) Mask32x4 + +// Equal compares for equality, masked. +// +// Asm: VPCMPUD, CPU Feature: AVX512EVEX +func (x Uint32x8) EqualMasked(y Uint32x8, z Mask32x8) Mask32x8 + +// Equal compares for equality, masked. +// +// Asm: VPCMPUD, CPU Feature: AVX512EVEX +func (x Uint32x16) EqualMasked(y Uint32x16, z Mask32x16) Mask32x16 + +// Equal compares for equality, masked. +// +// Asm: VPCMPUQ, CPU Feature: AVX512EVEX +func (x Uint64x2) EqualMasked(y Uint64x2, z Mask64x2) Mask64x2 + +// Equal compares for equality, masked. +// +// Asm: VPCMPUQ, CPU Feature: AVX512EVEX +func (x Uint64x4) EqualMasked(y Uint64x4, z Mask64x4) Mask64x4 + +// Equal compares for equality, masked. +// +// Asm: VPCMPUQ, CPU Feature: AVX512EVEX +func (x Uint64x8) EqualMasked(y Uint64x8, z Mask64x8) Mask64x8 + /* Floor */ // Floor rounds elements down to the nearest integer. @@ -967,6 +1745,38 @@ func (x Float64x4) FloorWithPrecision(imm uint8) Float64x4 // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX func (x Float64x8) FloorWithPrecision(imm uint8) Float64x8 +/* FloorWithPrecisionMasked */ + +// FloorWithPrecision rounds elements down with specified precision, masked. +// +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x4) FloorWithPrecisionMasked(imm uint8, y Mask32x4) Float32x4 + +// FloorWithPrecision rounds elements down with specified precision, masked. +// +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x8) FloorWithPrecisionMasked(imm uint8, y Mask32x8) Float32x8 + +// FloorWithPrecision rounds elements down with specified precision, masked. +// +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x16) FloorWithPrecisionMasked(imm uint8, y Mask32x16) Float32x16 + +// FloorWithPrecision rounds elements down with specified precision, masked. +// +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x2) FloorWithPrecisionMasked(imm uint8, y Mask64x2) Float64x2 + +// FloorWithPrecision rounds elements down with specified precision, masked. +// +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x4) FloorWithPrecisionMasked(imm uint8, y Mask64x4) Float64x4 + +// FloorWithPrecision rounds elements down with specified precision, masked. +// +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x8) FloorWithPrecisionMasked(imm uint8, y Mask64x8) Float64x8 + /* FusedMultiplyAdd */ // FusedMultiplyAdd performs `(v1 * v2) + v3`. @@ -999,6 +1809,38 @@ func (x Float64x4) FusedMultiplyAdd(y Float64x4, z Float64x4) Float64x4 // Asm: VFMADD213PD, CPU Feature: AVX512EVEX func (x Float64x8) FusedMultiplyAdd(y Float64x8, z Float64x8) Float64x8 +/* FusedMultiplyAddMasked */ + +// FusedMultiplyAdd performs `(v1 * v2) + v3`. +// +// Asm: VFMADD213PS, CPU Feature: AVX512EVEX +func (x Float32x4) FusedMultiplyAddMasked(y Float32x4, z Float32x4, u Mask32x4) Float32x4 + +// FusedMultiplyAdd performs `(v1 * v2) + v3`. +// +// Asm: VFMADD213PS, CPU Feature: AVX512EVEX +func (x Float32x8) FusedMultiplyAddMasked(y Float32x8, z Float32x8, u Mask32x8) Float32x8 + +// FusedMultiplyAdd performs `(v1 * v2) + v3`. +// +// Asm: VFMADD213PS, CPU Feature: AVX512EVEX +func (x Float32x16) FusedMultiplyAddMasked(y Float32x16, z Float32x16, u Mask32x16) Float32x16 + +// FusedMultiplyAdd performs `(v1 * v2) + v3`. +// +// Asm: VFMADD213PD, CPU Feature: AVX512EVEX +func (x Float64x2) FusedMultiplyAddMasked(y Float64x2, z Float64x2, u Mask64x2) Float64x2 + +// FusedMultiplyAdd performs `(v1 * v2) + v3`. +// +// Asm: VFMADD213PD, CPU Feature: AVX512EVEX +func (x Float64x4) FusedMultiplyAddMasked(y Float64x4, z Float64x4, u Mask64x4) Float64x4 + +// FusedMultiplyAdd performs `(v1 * v2) + v3`. +// +// Asm: VFMADD213PD, CPU Feature: AVX512EVEX +func (x Float64x8) FusedMultiplyAddMasked(y Float64x8, z Float64x8, u Mask64x8) Float64x8 + /* FusedMultiplyAddSub */ // FusedMultiplyAddSub performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements. @@ -1031,6 +1873,38 @@ func (x Float64x4) FusedMultiplyAddSub(y Float64x4, z Float64x4) Float64x4 // Asm: VFMADDSUB213PD, CPU Feature: AVX512EVEX func (x Float64x8) FusedMultiplyAddSub(y Float64x8, z Float64x8) Float64x8 +/* FusedMultiplyAddSubMasked */ + +// FusedMultiplyAddSub performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements. +// +// Asm: VFMADDSUB213PS, CPU Feature: AVX512EVEX +func (x Float32x4) FusedMultiplyAddSubMasked(y Float32x4, z Float32x4, u Mask32x4) Float32x4 + +// FusedMultiplyAddSub performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements. +// +// Asm: VFMADDSUB213PS, CPU Feature: AVX512EVEX +func (x Float32x8) FusedMultiplyAddSubMasked(y Float32x8, z Float32x8, u Mask32x8) Float32x8 + +// FusedMultiplyAddSub performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements. +// +// Asm: VFMADDSUB213PS, CPU Feature: AVX512EVEX +func (x Float32x16) FusedMultiplyAddSubMasked(y Float32x16, z Float32x16, u Mask32x16) Float32x16 + +// FusedMultiplyAddSub performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements. +// +// Asm: VFMADDSUB213PD, CPU Feature: AVX512EVEX +func (x Float64x2) FusedMultiplyAddSubMasked(y Float64x2, z Float64x2, u Mask64x2) Float64x2 + +// FusedMultiplyAddSub performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements. +// +// Asm: VFMADDSUB213PD, CPU Feature: AVX512EVEX +func (x Float64x4) FusedMultiplyAddSubMasked(y Float64x4, z Float64x4, u Mask64x4) Float64x4 + +// FusedMultiplyAddSub performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements. +// +// Asm: VFMADDSUB213PD, CPU Feature: AVX512EVEX +func (x Float64x8) FusedMultiplyAddSubMasked(y Float64x8, z Float64x8, u Mask64x8) Float64x8 + /* FusedMultiplySubAdd */ // FusedMultiplySubAdd performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements. @@ -1063,6 +1937,38 @@ func (x Float64x4) FusedMultiplySubAdd(y Float64x4, z Float64x4) Float64x4 // Asm: VFMSUBADD213PD, CPU Feature: AVX512EVEX func (x Float64x8) FusedMultiplySubAdd(y Float64x8, z Float64x8) Float64x8 +/* FusedMultiplySubAddMasked */ + +// FusedMultiplySubAdd performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements. +// +// Asm: VFMSUBADD213PS, CPU Feature: AVX512EVEX +func (x Float32x4) FusedMultiplySubAddMasked(y Float32x4, z Float32x4, u Mask32x4) Float32x4 + +// FusedMultiplySubAdd performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements. +// +// Asm: VFMSUBADD213PS, CPU Feature: AVX512EVEX +func (x Float32x8) FusedMultiplySubAddMasked(y Float32x8, z Float32x8, u Mask32x8) Float32x8 + +// FusedMultiplySubAdd performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements. +// +// Asm: VFMSUBADD213PS, CPU Feature: AVX512EVEX +func (x Float32x16) FusedMultiplySubAddMasked(y Float32x16, z Float32x16, u Mask32x16) Float32x16 + +// FusedMultiplySubAdd performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements. +// +// Asm: VFMSUBADD213PD, CPU Feature: AVX512EVEX +func (x Float64x2) FusedMultiplySubAddMasked(y Float64x2, z Float64x2, u Mask64x2) Float64x2 + +// FusedMultiplySubAdd performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements. +// +// Asm: VFMSUBADD213PD, CPU Feature: AVX512EVEX +func (x Float64x4) FusedMultiplySubAddMasked(y Float64x4, z Float64x4, u Mask64x4) Float64x4 + +// FusedMultiplySubAdd performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements. +// +// Asm: VFMSUBADD213PD, CPU Feature: AVX512EVEX +func (x Float64x8) FusedMultiplySubAddMasked(y Float64x8, z Float64x8, u Mask64x8) Float64x8 + /* GaloisFieldAffineTransform */ // GaloisFieldAffineTransform computes an affine transformation in GF(2^8): @@ -1091,7 +1997,7 @@ func (x Uint8x64) GaloisFieldAffineTransform(y Uint64x8, b uint8) Uint8x64 /* GaloisFieldAffineTransformInversed */ -// GaloisFieldAffineTransform computes an affine transformation in GF(2^8), +// GaloisFieldAffineTransformMasked computes an affine transformation in GF(2^8), // with x inversed with reduction polynomial x^8 + x^4 + x^3 + x + 1: // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y @@ -1100,7 +2006,7 @@ func (x Uint8x64) GaloisFieldAffineTransform(y Uint64x8, b uint8) Uint8x64 // Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512EVEX func (x Uint8x16) GaloisFieldAffineTransformInversed(y Uint64x2, b uint8) Uint8x16 -// GaloisFieldAffineTransform computes an affine transformation in GF(2^8), +// GaloisFieldAffineTransformMasked computes an affine transformation in GF(2^8), // with x inversed with reduction polynomial x^8 + x^4 + x^3 + x + 1: // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y @@ -1109,7 +2015,7 @@ func (x Uint8x16) GaloisFieldAffineTransformInversed(y Uint64x2, b uint8) Uint8x // Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512EVEX func (x Uint8x32) GaloisFieldAffineTransformInversed(y Uint64x4, b uint8) Uint8x32 -// GaloisFieldAffineTransform computes an affine transformation in GF(2^8), +// GaloisFieldAffineTransformMasked computes an affine transformation in GF(2^8), // with x inversed with reduction polynomial x^8 + x^4 + x^3 + x + 1: // x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; // b is an 8-bit vector. The affine transformation is y * x + b, with each element of y @@ -1118,12 +2024,67 @@ func (x Uint8x32) GaloisFieldAffineTransformInversed(y Uint64x4, b uint8) Uint8x // Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512EVEX func (x Uint8x64) GaloisFieldAffineTransformInversed(y Uint64x8, b uint8) Uint8x64 -/* GaloisFieldMul */ +/* GaloisFieldAffineTransformInversedMasked */ -// GaloisFieldMul computes element-wise GF(2^8) multiplication with -// reduction polynomial x^8 + x^4 + x^3 + x + 1. +// GaloisFieldAffineTransformMasked computes an affine transformation in GF(2^8), +// with x inversed with reduction polynomial x^8 + x^4 + x^3 + x + 1: +// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; +// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y +// corresponding to a group of 8 elements in x. // -// Asm: VGF2P8MULB, CPU Feature: AVX512EVEX +// Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512EVEX +func (x Uint8x16) GaloisFieldAffineTransformInversedMasked(y Uint64x2, b uint8, m Mask8x16) Uint8x16 + +// GaloisFieldAffineTransformMasked computes an affine transformation in GF(2^8), +// with x inversed with reduction polynomial x^8 + x^4 + x^3 + x + 1: +// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; +// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y +// corresponding to a group of 8 elements in x. +// +// Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512EVEX +func (x Uint8x32) GaloisFieldAffineTransformInversedMasked(y Uint64x4, b uint8, m Mask8x32) Uint8x32 + +// GaloisFieldAffineTransformMasked computes an affine transformation in GF(2^8), +// with x inversed with reduction polynomial x^8 + x^4 + x^3 + x + 1: +// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; +// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y +// corresponding to a group of 8 elements in x. +// +// Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512EVEX +func (x Uint8x64) GaloisFieldAffineTransformInversedMasked(y Uint64x8, b uint8, m Mask8x64) Uint8x64 + +/* GaloisFieldAffineTransformMasked */ + +// GaloisFieldAffineTransform computes an affine transformation in GF(2^8): +// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; +// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y +// corresponding to a group of 8 elements in x. +// +// Asm: VGF2P8AFFINEQB, CPU Feature: AVX512EVEX +func (x Uint8x16) GaloisFieldAffineTransformMasked(y Uint64x2, b uint8, m Mask8x16) Uint8x16 + +// GaloisFieldAffineTransform computes an affine transformation in GF(2^8): +// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; +// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y +// corresponding to a group of 8 elements in x. +// +// Asm: VGF2P8AFFINEQB, CPU Feature: AVX512EVEX +func (x Uint8x32) GaloisFieldAffineTransformMasked(y Uint64x4, b uint8, m Mask8x32) Uint8x32 + +// GaloisFieldAffineTransform computes an affine transformation in GF(2^8): +// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; +// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y +// corresponding to a group of 8 elements in x. +// +// Asm: VGF2P8AFFINEQB, CPU Feature: AVX512EVEX +func (x Uint8x64) GaloisFieldAffineTransformMasked(y Uint64x8, b uint8, m Mask8x64) Uint8x64 + +/* GaloisFieldMul */ + +// GaloisFieldMul computes element-wise GF(2^8) multiplication with +// reduction polynomial x^8 + x^4 + x^3 + x + 1. +// +// Asm: VGF2P8MULB, CPU Feature: AVX512EVEX func (x Uint8x16) GaloisFieldMul(y Uint8x16) Uint8x16 // GaloisFieldMul computes element-wise GF(2^8) multiplication with @@ -1138,6 +2099,26 @@ func (x Uint8x32) GaloisFieldMul(y Uint8x32) Uint8x32 // Asm: VGF2P8MULB, CPU Feature: AVX512EVEX func (x Uint8x64) GaloisFieldMul(y Uint8x64) Uint8x64 +/* GaloisFieldMulMasked */ + +// GaloisFieldMul computes element-wise GF(2^8) multiplication with +// reduction polynomial x^8 + x^4 + x^3 + x + 1. +// +// Asm: VGF2P8MULB, CPU Feature: AVX512EVEX +func (x Uint8x16) GaloisFieldMulMasked(y Uint8x16, z Mask8x16) Uint8x16 + +// GaloisFieldMul computes element-wise GF(2^8) multiplication with +// reduction polynomial x^8 + x^4 + x^3 + x + 1. +// +// Asm: VGF2P8MULB, CPU Feature: AVX512EVEX +func (x Uint8x32) GaloisFieldMulMasked(y Uint8x32, z Mask8x32) Uint8x32 + +// GaloisFieldMul computes element-wise GF(2^8) multiplication with +// reduction polynomial x^8 + x^4 + x^3 + x + 1. +// +// Asm: VGF2P8MULB, CPU Feature: AVX512EVEX +func (x Uint8x64) GaloisFieldMulMasked(y Uint8x64, z Mask8x64) Uint8x64 + /* Get128 */ // Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand. @@ -1536,4552 +2517,1285 @@ func (x Uint64x4) GreaterEqual(y Uint64x4) Mask64x4 // Asm: VPCMPUQ, CPU Feature: AVX512EVEX func (x Uint64x8) GreaterEqual(y Uint64x8) Mask64x8 -/* IsNan */ - -// IsNan checks if elements are NaN. Use as x.IsNan(x). -// -// Asm: VCMPPS, CPU Feature: AVX -func (x Float32x4) IsNan(y Float32x4) Mask32x4 - -// IsNan checks if elements are NaN. Use as x.IsNan(x). -// -// Asm: VCMPPS, CPU Feature: AVX -func (x Float32x8) IsNan(y Float32x8) Mask32x8 +/* GreaterEqualMasked */ -// IsNan checks if elements are NaN. Use as x.IsNan(x). +// GreaterEqual compares for greater than or equal. // // Asm: VCMPPS, CPU Feature: AVX512EVEX -func (x Float32x16) IsNan(y Float32x16) Mask32x16 - -// IsNan checks if elements are NaN. Use as x.IsNan(x). -// -// Asm: VCMPPD, CPU Feature: AVX -func (x Float64x2) IsNan(y Float64x2) Mask64x2 - -// IsNan checks if elements are NaN. Use as x.IsNan(x). -// -// Asm: VCMPPD, CPU Feature: AVX -func (x Float64x4) IsNan(y Float64x4) Mask64x4 - -// IsNan checks if elements are NaN. Use as x.IsNan(x). -// -// Asm: VCMPPD, CPU Feature: AVX512EVEX -func (x Float64x8) IsNan(y Float64x8) Mask64x8 - -/* Less */ - -// Less compares for less than. -// -// Asm: VCMPPS, CPU Feature: AVX -func (x Float32x4) Less(y Float32x4) Mask32x4 +func (x Float32x4) GreaterEqualMasked(y Float32x4, z Mask32x4) Mask32x4 -// Less compares for less than. +// GreaterEqual compares for greater than or equal. // -// Asm: VCMPPS, CPU Feature: AVX -func (x Float32x8) Less(y Float32x8) Mask32x8 +// Asm: VCMPPS, CPU Feature: AVX512EVEX +func (x Float32x8) GreaterEqualMasked(y Float32x8, z Mask32x8) Mask32x8 -// Less compares for less than. +// GreaterEqual compares for greater than or equal. // // Asm: VCMPPS, CPU Feature: AVX512EVEX -func (x Float32x16) Less(y Float32x16) Mask32x16 +func (x Float32x16) GreaterEqualMasked(y Float32x16, z Mask32x16) Mask32x16 -// Less compares for less than. +// GreaterEqual compares for greater than or equal. // -// Asm: VCMPPD, CPU Feature: AVX -func (x Float64x2) Less(y Float64x2) Mask64x2 +// Asm: VCMPPD, CPU Feature: AVX512EVEX +func (x Float64x2) GreaterEqualMasked(y Float64x2, z Mask64x2) Mask64x2 -// Less compares for less than. +// GreaterEqual compares for greater than or equal. // -// Asm: VCMPPD, CPU Feature: AVX -func (x Float64x4) Less(y Float64x4) Mask64x4 +// Asm: VCMPPD, CPU Feature: AVX512EVEX +func (x Float64x4) GreaterEqualMasked(y Float64x4, z Mask64x4) Mask64x4 -// Less compares for less than. +// GreaterEqual compares for greater than or equal. // // Asm: VCMPPD, CPU Feature: AVX512EVEX -func (x Float64x8) Less(y Float64x8) Mask64x8 +func (x Float64x8) GreaterEqualMasked(y Float64x8, z Mask64x8) Mask64x8 -// Less compares for less than. +// GreaterEqual compares for greater than or equal. // // Asm: VPCMPB, CPU Feature: AVX512EVEX -func (x Int8x16) Less(y Int8x16) Mask8x16 +func (x Int8x16) GreaterEqualMasked(y Int8x16, z Mask8x16) Mask8x16 -// Less compares for less than. +// GreaterEqual compares for greater than or equal. // // Asm: VPCMPB, CPU Feature: AVX512EVEX -func (x Int8x32) Less(y Int8x32) Mask8x32 +func (x Int8x32) GreaterEqualMasked(y Int8x32, z Mask8x32) Mask8x32 -// Less compares for less than. +// GreaterEqual compares for greater than or equal. // // Asm: VPCMPB, CPU Feature: AVX512EVEX -func (x Int8x64) Less(y Int8x64) Mask8x64 +func (x Int8x64) GreaterEqualMasked(y Int8x64, z Mask8x64) Mask8x64 -// Less compares for less than. +// GreaterEqual compares for greater than or equal. // // Asm: VPCMPW, CPU Feature: AVX512EVEX -func (x Int16x8) Less(y Int16x8) Mask16x8 +func (x Int16x8) GreaterEqualMasked(y Int16x8, z Mask16x8) Mask16x8 -// Less compares for less than. +// GreaterEqual compares for greater than or equal. // // Asm: VPCMPW, CPU Feature: AVX512EVEX -func (x Int16x16) Less(y Int16x16) Mask16x16 +func (x Int16x16) GreaterEqualMasked(y Int16x16, z Mask16x16) Mask16x16 -// Less compares for less than. +// GreaterEqual compares for greater than or equal. // // Asm: VPCMPW, CPU Feature: AVX512EVEX -func (x Int16x32) Less(y Int16x32) Mask16x32 +func (x Int16x32) GreaterEqualMasked(y Int16x32, z Mask16x32) Mask16x32 -// Less compares for less than. +// GreaterEqual compares for greater than or equal. // // Asm: VPCMPD, CPU Feature: AVX512EVEX -func (x Int32x4) Less(y Int32x4) Mask32x4 +func (x Int32x4) GreaterEqualMasked(y Int32x4, z Mask32x4) Mask32x4 -// Less compares for less than. +// GreaterEqual compares for greater than or equal. // // Asm: VPCMPD, CPU Feature: AVX512EVEX -func (x Int32x8) Less(y Int32x8) Mask32x8 +func (x Int32x8) GreaterEqualMasked(y Int32x8, z Mask32x8) Mask32x8 -// Less compares for less than. +// GreaterEqual compares for greater than or equal. // // Asm: VPCMPD, CPU Feature: AVX512EVEX -func (x Int32x16) Less(y Int32x16) Mask32x16 +func (x Int32x16) GreaterEqualMasked(y Int32x16, z Mask32x16) Mask32x16 -// Less compares for less than. +// GreaterEqual compares for greater than or equal. // // Asm: VPCMPQ, CPU Feature: AVX512EVEX -func (x Int64x2) Less(y Int64x2) Mask64x2 +func (x Int64x2) GreaterEqualMasked(y Int64x2, z Mask64x2) Mask64x2 -// Less compares for less than. +// GreaterEqual compares for greater than or equal. // // Asm: VPCMPQ, CPU Feature: AVX512EVEX -func (x Int64x4) Less(y Int64x4) Mask64x4 +func (x Int64x4) GreaterEqualMasked(y Int64x4, z Mask64x4) Mask64x4 -// Less compares for less than. +// GreaterEqual compares for greater than or equal. // // Asm: VPCMPQ, CPU Feature: AVX512EVEX -func (x Int64x8) Less(y Int64x8) Mask64x8 +func (x Int64x8) GreaterEqualMasked(y Int64x8, z Mask64x8) Mask64x8 -// Less compares for less than. +// GreaterEqual compares for greater than or equal. // // Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x16) Less(y Uint8x16) Mask8x16 +func (x Uint8x16) GreaterEqualMasked(y Uint8x16, z Mask8x16) Mask8x16 -// Less compares for less than. +// GreaterEqual compares for greater than or equal. // // Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x32) Less(y Uint8x32) Mask8x32 +func (x Uint8x32) GreaterEqualMasked(y Uint8x32, z Mask8x32) Mask8x32 -// Less compares for less than. +// GreaterEqual compares for greater than or equal. // // Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x64) Less(y Uint8x64) Mask8x64 +func (x Uint8x64) GreaterEqualMasked(y Uint8x64, z Mask8x64) Mask8x64 -// Less compares for less than. +// GreaterEqual compares for greater than or equal. // // Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x8) Less(y Uint16x8) Mask16x8 +func (x Uint16x8) GreaterEqualMasked(y Uint16x8, z Mask16x8) Mask16x8 -// Less compares for less than. +// GreaterEqual compares for greater than or equal. // // Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x16) Less(y Uint16x16) Mask16x16 +func (x Uint16x16) GreaterEqualMasked(y Uint16x16, z Mask16x16) Mask16x16 -// Less compares for less than. +// GreaterEqual compares for greater than or equal. // // Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x32) Less(y Uint16x32) Mask16x32 +func (x Uint16x32) GreaterEqualMasked(y Uint16x32, z Mask16x32) Mask16x32 -// Less compares for less than. +// GreaterEqual compares for greater than or equal. // // Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x4) Less(y Uint32x4) Mask32x4 +func (x Uint32x4) GreaterEqualMasked(y Uint32x4, z Mask32x4) Mask32x4 -// Less compares for less than. +// GreaterEqual compares for greater than or equal. // // Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x8) Less(y Uint32x8) Mask32x8 +func (x Uint32x8) GreaterEqualMasked(y Uint32x8, z Mask32x8) Mask32x8 -// Less compares for less than. +// GreaterEqual compares for greater than or equal. // // Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x16) Less(y Uint32x16) Mask32x16 +func (x Uint32x16) GreaterEqualMasked(y Uint32x16, z Mask32x16) Mask32x16 -// Less compares for less than. +// GreaterEqual compares for greater than or equal. // // Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x2) Less(y Uint64x2) Mask64x2 +func (x Uint64x2) GreaterEqualMasked(y Uint64x2, z Mask64x2) Mask64x2 -// Less compares for less than. +// GreaterEqual compares for greater than or equal. // // Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x4) Less(y Uint64x4) Mask64x4 +func (x Uint64x4) GreaterEqualMasked(y Uint64x4, z Mask64x4) Mask64x4 -// Less compares for less than. +// GreaterEqual compares for greater than or equal. // // Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x8) Less(y Uint64x8) Mask64x8 +func (x Uint64x8) GreaterEqualMasked(y Uint64x8, z Mask64x8) Mask64x8 -/* LessEqual */ +/* GreaterMasked */ -// LessEqual compares for less than or equal. +// Greater compares for greater than. // -// Asm: VCMPPS, CPU Feature: AVX -func (x Float32x4) LessEqual(y Float32x4) Mask32x4 +// Asm: VCMPPS, CPU Feature: AVX512EVEX +func (x Float32x4) GreaterMasked(y Float32x4, z Mask32x4) Mask32x4 -// LessEqual compares for less than or equal. +// Greater compares for greater than. // -// Asm: VCMPPS, CPU Feature: AVX -func (x Float32x8) LessEqual(y Float32x8) Mask32x8 +// Asm: VCMPPS, CPU Feature: AVX512EVEX +func (x Float32x8) GreaterMasked(y Float32x8, z Mask32x8) Mask32x8 -// LessEqual compares for less than or equal. +// Greater compares for greater than. // // Asm: VCMPPS, CPU Feature: AVX512EVEX -func (x Float32x16) LessEqual(y Float32x16) Mask32x16 +func (x Float32x16) GreaterMasked(y Float32x16, z Mask32x16) Mask32x16 -// LessEqual compares for less than or equal. +// Greater compares for greater than. // -// Asm: VCMPPD, CPU Feature: AVX -func (x Float64x2) LessEqual(y Float64x2) Mask64x2 +// Asm: VCMPPD, CPU Feature: AVX512EVEX +func (x Float64x2) GreaterMasked(y Float64x2, z Mask64x2) Mask64x2 -// LessEqual compares for less than or equal. +// Greater compares for greater than. // -// Asm: VCMPPD, CPU Feature: AVX -func (x Float64x4) LessEqual(y Float64x4) Mask64x4 +// Asm: VCMPPD, CPU Feature: AVX512EVEX +func (x Float64x4) GreaterMasked(y Float64x4, z Mask64x4) Mask64x4 -// LessEqual compares for less than or equal. +// Greater compares for greater than. // // Asm: VCMPPD, CPU Feature: AVX512EVEX -func (x Float64x8) LessEqual(y Float64x8) Mask64x8 +func (x Float64x8) GreaterMasked(y Float64x8, z Mask64x8) Mask64x8 -// LessEqual compares for less than or equal. +// Greater compares for greater than. // // Asm: VPCMPB, CPU Feature: AVX512EVEX -func (x Int8x16) LessEqual(y Int8x16) Mask8x16 +func (x Int8x16) GreaterMasked(y Int8x16, z Mask8x16) Mask8x16 -// LessEqual compares for less than or equal. +// Greater compares for greater than. // // Asm: VPCMPB, CPU Feature: AVX512EVEX -func (x Int8x32) LessEqual(y Int8x32) Mask8x32 +func (x Int8x32) GreaterMasked(y Int8x32, z Mask8x32) Mask8x32 -// LessEqual compares for less than or equal. +// Greater compares for greater than. // // Asm: VPCMPB, CPU Feature: AVX512EVEX -func (x Int8x64) LessEqual(y Int8x64) Mask8x64 +func (x Int8x64) GreaterMasked(y Int8x64, z Mask8x64) Mask8x64 -// LessEqual compares for less than or equal. +// Greater compares for greater than. // // Asm: VPCMPW, CPU Feature: AVX512EVEX -func (x Int16x8) LessEqual(y Int16x8) Mask16x8 +func (x Int16x8) GreaterMasked(y Int16x8, z Mask16x8) Mask16x8 -// LessEqual compares for less than or equal. +// Greater compares for greater than. // // Asm: VPCMPW, CPU Feature: AVX512EVEX -func (x Int16x16) LessEqual(y Int16x16) Mask16x16 +func (x Int16x16) GreaterMasked(y Int16x16, z Mask16x16) Mask16x16 -// LessEqual compares for less than or equal. +// Greater compares for greater than. // // Asm: VPCMPW, CPU Feature: AVX512EVEX -func (x Int16x32) LessEqual(y Int16x32) Mask16x32 +func (x Int16x32) GreaterMasked(y Int16x32, z Mask16x32) Mask16x32 -// LessEqual compares for less than or equal. +// Greater compares for greater than. // // Asm: VPCMPD, CPU Feature: AVX512EVEX -func (x Int32x4) LessEqual(y Int32x4) Mask32x4 +func (x Int32x4) GreaterMasked(y Int32x4, z Mask32x4) Mask32x4 -// LessEqual compares for less than or equal. +// Greater compares for greater than. // // Asm: VPCMPD, CPU Feature: AVX512EVEX -func (x Int32x8) LessEqual(y Int32x8) Mask32x8 +func (x Int32x8) GreaterMasked(y Int32x8, z Mask32x8) Mask32x8 -// LessEqual compares for less than or equal. +// Greater compares for greater than. // // Asm: VPCMPD, CPU Feature: AVX512EVEX -func (x Int32x16) LessEqual(y Int32x16) Mask32x16 +func (x Int32x16) GreaterMasked(y Int32x16, z Mask32x16) Mask32x16 -// LessEqual compares for less than or equal. +// Greater compares for greater than. // // Asm: VPCMPQ, CPU Feature: AVX512EVEX -func (x Int64x2) LessEqual(y Int64x2) Mask64x2 +func (x Int64x2) GreaterMasked(y Int64x2, z Mask64x2) Mask64x2 -// LessEqual compares for less than or equal. +// Greater compares for greater than. // // Asm: VPCMPQ, CPU Feature: AVX512EVEX -func (x Int64x4) LessEqual(y Int64x4) Mask64x4 +func (x Int64x4) GreaterMasked(y Int64x4, z Mask64x4) Mask64x4 -// LessEqual compares for less than or equal. +// Greater compares for greater than. // // Asm: VPCMPQ, CPU Feature: AVX512EVEX -func (x Int64x8) LessEqual(y Int64x8) Mask64x8 +func (x Int64x8) GreaterMasked(y Int64x8, z Mask64x8) Mask64x8 -// LessEqual compares for less than or equal. +// Greater compares for greater than. // // Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x16) LessEqual(y Uint8x16) Mask8x16 +func (x Uint8x16) GreaterMasked(y Uint8x16, z Mask8x16) Mask8x16 -// LessEqual compares for less than or equal. +// Greater compares for greater than. // // Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x32) LessEqual(y Uint8x32) Mask8x32 +func (x Uint8x32) GreaterMasked(y Uint8x32, z Mask8x32) Mask8x32 -// LessEqual compares for less than or equal. +// Greater compares for greater than. // // Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x64) LessEqual(y Uint8x64) Mask8x64 +func (x Uint8x64) GreaterMasked(y Uint8x64, z Mask8x64) Mask8x64 -// LessEqual compares for less than or equal. +// Greater compares for greater than. // // Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x8) LessEqual(y Uint16x8) Mask16x8 +func (x Uint16x8) GreaterMasked(y Uint16x8, z Mask16x8) Mask16x8 -// LessEqual compares for less than or equal. +// Greater compares for greater than. // // Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x16) LessEqual(y Uint16x16) Mask16x16 +func (x Uint16x16) GreaterMasked(y Uint16x16, z Mask16x16) Mask16x16 -// LessEqual compares for less than or equal. +// Greater compares for greater than. // // Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x32) LessEqual(y Uint16x32) Mask16x32 +func (x Uint16x32) GreaterMasked(y Uint16x32, z Mask16x32) Mask16x32 -// LessEqual compares for less than or equal. +// Greater compares for greater than. // // Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x4) LessEqual(y Uint32x4) Mask32x4 +func (x Uint32x4) GreaterMasked(y Uint32x4, z Mask32x4) Mask32x4 -// LessEqual compares for less than or equal. +// Greater compares for greater than. // // Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x8) LessEqual(y Uint32x8) Mask32x8 +func (x Uint32x8) GreaterMasked(y Uint32x8, z Mask32x8) Mask32x8 -// LessEqual compares for less than or equal. +// Greater compares for greater than. // // Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x16) LessEqual(y Uint32x16) Mask32x16 +func (x Uint32x16) GreaterMasked(y Uint32x16, z Mask32x16) Mask32x16 -// LessEqual compares for less than or equal. +// Greater compares for greater than. // // Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x2) LessEqual(y Uint64x2) Mask64x2 +func (x Uint64x2) GreaterMasked(y Uint64x2, z Mask64x2) Mask64x2 -// LessEqual compares for less than or equal. +// Greater compares for greater than. // // Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x4) LessEqual(y Uint64x4) Mask64x4 +func (x Uint64x4) GreaterMasked(y Uint64x4, z Mask64x4) Mask64x4 -// LessEqual compares for less than or equal. +// Greater compares for greater than. // // Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x8) LessEqual(y Uint64x8) Mask64x8 +func (x Uint64x8) GreaterMasked(y Uint64x8, z Mask64x8) Mask64x8 -/* MaskedAbsolute */ +/* IsNan */ -// Absolute computes the absolute value of each element. +// IsNan checks if elements are NaN. Use as x.IsNan(x). // -// Asm: VPABSB, CPU Feature: AVX512EVEX -func (x Int8x16) MaskedAbsolute(y Mask8x16) Int8x16 +// Asm: VCMPPS, CPU Feature: AVX +func (x Float32x4) IsNan(y Float32x4) Mask32x4 -// Absolute computes the absolute value of each element. +// IsNan checks if elements are NaN. Use as x.IsNan(x). // -// Asm: VPABSB, CPU Feature: AVX512EVEX -func (x Int8x32) MaskedAbsolute(y Mask8x32) Int8x32 +// Asm: VCMPPS, CPU Feature: AVX +func (x Float32x8) IsNan(y Float32x8) Mask32x8 -// Absolute computes the absolute value of each element. +// IsNan checks if elements are NaN. Use as x.IsNan(x). // -// Asm: VPABSB, CPU Feature: AVX512EVEX -func (x Int8x64) MaskedAbsolute(y Mask8x64) Int8x64 +// Asm: VCMPPS, CPU Feature: AVX512EVEX +func (x Float32x16) IsNan(y Float32x16) Mask32x16 -// Absolute computes the absolute value of each element. +// IsNan checks if elements are NaN. Use as x.IsNan(x). // -// Asm: VPABSW, CPU Feature: AVX512EVEX -func (x Int16x8) MaskedAbsolute(y Mask16x8) Int16x8 +// Asm: VCMPPD, CPU Feature: AVX +func (x Float64x2) IsNan(y Float64x2) Mask64x2 -// Absolute computes the absolute value of each element. +// IsNan checks if elements are NaN. Use as x.IsNan(x). // -// Asm: VPABSW, CPU Feature: AVX512EVEX -func (x Int16x16) MaskedAbsolute(y Mask16x16) Int16x16 +// Asm: VCMPPD, CPU Feature: AVX +func (x Float64x4) IsNan(y Float64x4) Mask64x4 -// Absolute computes the absolute value of each element. +// IsNan checks if elements are NaN. Use as x.IsNan(x). // -// Asm: VPABSW, CPU Feature: AVX512EVEX -func (x Int16x32) MaskedAbsolute(y Mask16x32) Int16x32 +// Asm: VCMPPD, CPU Feature: AVX512EVEX +func (x Float64x8) IsNan(y Float64x8) Mask64x8 -// Absolute computes the absolute value of each element. +/* IsNanMasked */ + +// IsNan checks if elements are NaN. Use as x.IsNan(x). // -// Asm: VPABSD, CPU Feature: AVX512EVEX -func (x Int32x4) MaskedAbsolute(y Mask32x4) Int32x4 +// Asm: VCMPPS, CPU Feature: AVX512EVEX +func (x Float32x4) IsNanMasked(y Float32x4, z Mask32x4) Mask32x4 -// Absolute computes the absolute value of each element. +// IsNan checks if elements are NaN. Use as x.IsNan(x). // -// Asm: VPABSD, CPU Feature: AVX512EVEX -func (x Int32x8) MaskedAbsolute(y Mask32x8) Int32x8 +// Asm: VCMPPS, CPU Feature: AVX512EVEX +func (x Float32x8) IsNanMasked(y Float32x8, z Mask32x8) Mask32x8 -// Absolute computes the absolute value of each element. +// IsNan checks if elements are NaN. Use as x.IsNan(x). // -// Asm: VPABSD, CPU Feature: AVX512EVEX -func (x Int32x16) MaskedAbsolute(y Mask32x16) Int32x16 +// Asm: VCMPPS, CPU Feature: AVX512EVEX +func (x Float32x16) IsNanMasked(y Float32x16, z Mask32x16) Mask32x16 -// Absolute computes the absolute value of each element. +// IsNan checks if elements are NaN. Use as x.IsNan(x). // -// Asm: VPABSQ, CPU Feature: AVX512EVEX -func (x Int64x2) MaskedAbsolute(y Mask64x2) Int64x2 +// Asm: VCMPPD, CPU Feature: AVX512EVEX +func (x Float64x2) IsNanMasked(y Float64x2, z Mask64x2) Mask64x2 -// Absolute computes the absolute value of each element. +// IsNan checks if elements are NaN. Use as x.IsNan(x). // -// Asm: VPABSQ, CPU Feature: AVX512EVEX -func (x Int64x4) MaskedAbsolute(y Mask64x4) Int64x4 +// Asm: VCMPPD, CPU Feature: AVX512EVEX +func (x Float64x4) IsNanMasked(y Float64x4, z Mask64x4) Mask64x4 -// Absolute computes the absolute value of each element. +// IsNan checks if elements are NaN. Use as x.IsNan(x). // -// Asm: VPABSQ, CPU Feature: AVX512EVEX -func (x Int64x8) MaskedAbsolute(y Mask64x8) Int64x8 +// Asm: VCMPPD, CPU Feature: AVX512EVEX +func (x Float64x8) IsNanMasked(y Float64x8, z Mask64x8) Mask64x8 -/* MaskedAdd */ +/* Less */ -// Add adds corresponding elements of two vectors. +// Less compares for less than. // -// Asm: VADDPS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedAdd(y Float32x4, z Mask32x4) Float32x4 +// Asm: VCMPPS, CPU Feature: AVX +func (x Float32x4) Less(y Float32x4) Mask32x4 -// Add adds corresponding elements of two vectors. +// Less compares for less than. // -// Asm: VADDPS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedAdd(y Float32x8, z Mask32x8) Float32x8 +// Asm: VCMPPS, CPU Feature: AVX +func (x Float32x8) Less(y Float32x8) Mask32x8 -// Add adds corresponding elements of two vectors. +// Less compares for less than. // -// Asm: VADDPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedAdd(y Float32x16, z Mask32x16) Float32x16 +// Asm: VCMPPS, CPU Feature: AVX512EVEX +func (x Float32x16) Less(y Float32x16) Mask32x16 -// Add adds corresponding elements of two vectors. +// Less compares for less than. // -// Asm: VADDPD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedAdd(y Float64x2, z Mask64x2) Float64x2 +// Asm: VCMPPD, CPU Feature: AVX +func (x Float64x2) Less(y Float64x2) Mask64x2 -// Add adds corresponding elements of two vectors. +// Less compares for less than. // -// Asm: VADDPD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedAdd(y Float64x4, z Mask64x4) Float64x4 +// Asm: VCMPPD, CPU Feature: AVX +func (x Float64x4) Less(y Float64x4) Mask64x4 -// Add adds corresponding elements of two vectors. +// Less compares for less than. // -// Asm: VADDPD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedAdd(y Float64x8, z Mask64x8) Float64x8 +// Asm: VCMPPD, CPU Feature: AVX512EVEX +func (x Float64x8) Less(y Float64x8) Mask64x8 -// Add adds corresponding elements of two vectors. +// Less compares for less than. // -// Asm: VPADDB, CPU Feature: AVX512EVEX -func (x Int8x16) MaskedAdd(y Int8x16, z Mask8x16) Int8x16 +// Asm: VPCMPB, CPU Feature: AVX512EVEX +func (x Int8x16) Less(y Int8x16) Mask8x16 -// Add adds corresponding elements of two vectors. +// Less compares for less than. // -// Asm: VPADDB, CPU Feature: AVX512EVEX -func (x Int8x32) MaskedAdd(y Int8x32, z Mask8x32) Int8x32 +// Asm: VPCMPB, CPU Feature: AVX512EVEX +func (x Int8x32) Less(y Int8x32) Mask8x32 -// Add adds corresponding elements of two vectors. +// Less compares for less than. // -// Asm: VPADDB, CPU Feature: AVX512EVEX -func (x Int8x64) MaskedAdd(y Int8x64, z Mask8x64) Int8x64 +// Asm: VPCMPB, CPU Feature: AVX512EVEX +func (x Int8x64) Less(y Int8x64) Mask8x64 -// Add adds corresponding elements of two vectors. +// Less compares for less than. // -// Asm: VPADDW, CPU Feature: AVX512EVEX -func (x Int16x8) MaskedAdd(y Int16x8, z Mask16x8) Int16x8 +// Asm: VPCMPW, CPU Feature: AVX512EVEX +func (x Int16x8) Less(y Int16x8) Mask16x8 -// Add adds corresponding elements of two vectors. +// Less compares for less than. // -// Asm: VPADDW, CPU Feature: AVX512EVEX -func (x Int16x16) MaskedAdd(y Int16x16, z Mask16x16) Int16x16 +// Asm: VPCMPW, CPU Feature: AVX512EVEX +func (x Int16x16) Less(y Int16x16) Mask16x16 -// Add adds corresponding elements of two vectors. +// Less compares for less than. // -// Asm: VPADDW, CPU Feature: AVX512EVEX -func (x Int16x32) MaskedAdd(y Int16x32, z Mask16x32) Int16x32 +// Asm: VPCMPW, CPU Feature: AVX512EVEX +func (x Int16x32) Less(y Int16x32) Mask16x32 -// Add adds corresponding elements of two vectors. +// Less compares for less than. // -// Asm: VPADDD, CPU Feature: AVX512EVEX -func (x Int32x4) MaskedAdd(y Int32x4, z Mask32x4) Int32x4 +// Asm: VPCMPD, CPU Feature: AVX512EVEX +func (x Int32x4) Less(y Int32x4) Mask32x4 -// Add adds corresponding elements of two vectors. +// Less compares for less than. // -// Asm: VPADDD, CPU Feature: AVX512EVEX -func (x Int32x8) MaskedAdd(y Int32x8, z Mask32x8) Int32x8 +// Asm: VPCMPD, CPU Feature: AVX512EVEX +func (x Int32x8) Less(y Int32x8) Mask32x8 -// Add adds corresponding elements of two vectors. +// Less compares for less than. // -// Asm: VPADDD, CPU Feature: AVX512EVEX -func (x Int32x16) MaskedAdd(y Int32x16, z Mask32x16) Int32x16 +// Asm: VPCMPD, CPU Feature: AVX512EVEX +func (x Int32x16) Less(y Int32x16) Mask32x16 -// Add adds corresponding elements of two vectors. +// Less compares for less than. // -// Asm: VPADDQ, CPU Feature: AVX512EVEX -func (x Int64x2) MaskedAdd(y Int64x2, z Mask64x2) Int64x2 +// Asm: VPCMPQ, CPU Feature: AVX512EVEX +func (x Int64x2) Less(y Int64x2) Mask64x2 -// Add adds corresponding elements of two vectors. +// Less compares for less than. // -// Asm: VPADDQ, CPU Feature: AVX512EVEX -func (x Int64x4) MaskedAdd(y Int64x4, z Mask64x4) Int64x4 +// Asm: VPCMPQ, CPU Feature: AVX512EVEX +func (x Int64x4) Less(y Int64x4) Mask64x4 -// Add adds corresponding elements of two vectors. +// Less compares for less than. // -// Asm: VPADDQ, CPU Feature: AVX512EVEX -func (x Int64x8) MaskedAdd(y Int64x8, z Mask64x8) Int64x8 +// Asm: VPCMPQ, CPU Feature: AVX512EVEX +func (x Int64x8) Less(y Int64x8) Mask64x8 -// Add adds corresponding elements of two vectors. +// Less compares for less than. // -// Asm: VPADDB, CPU Feature: AVX512EVEX -func (x Uint8x16) MaskedAdd(y Uint8x16, z Mask8x16) Uint8x16 +// Asm: VPCMPUB, CPU Feature: AVX512EVEX +func (x Uint8x16) Less(y Uint8x16) Mask8x16 -// Add adds corresponding elements of two vectors. +// Less compares for less than. // -// Asm: VPADDB, CPU Feature: AVX512EVEX -func (x Uint8x32) MaskedAdd(y Uint8x32, z Mask8x32) Uint8x32 +// Asm: VPCMPUB, CPU Feature: AVX512EVEX +func (x Uint8x32) Less(y Uint8x32) Mask8x32 -// Add adds corresponding elements of two vectors. +// Less compares for less than. // -// Asm: VPADDB, CPU Feature: AVX512EVEX -func (x Uint8x64) MaskedAdd(y Uint8x64, z Mask8x64) Uint8x64 +// Asm: VPCMPUB, CPU Feature: AVX512EVEX +func (x Uint8x64) Less(y Uint8x64) Mask8x64 -// Add adds corresponding elements of two vectors. +// Less compares for less than. // -// Asm: VPADDW, CPU Feature: AVX512EVEX -func (x Uint16x8) MaskedAdd(y Uint16x8, z Mask16x8) Uint16x8 +// Asm: VPCMPUW, CPU Feature: AVX512EVEX +func (x Uint16x8) Less(y Uint16x8) Mask16x8 -// Add adds corresponding elements of two vectors. +// Less compares for less than. // -// Asm: VPADDW, CPU Feature: AVX512EVEX -func (x Uint16x16) MaskedAdd(y Uint16x16, z Mask16x16) Uint16x16 +// Asm: VPCMPUW, CPU Feature: AVX512EVEX +func (x Uint16x16) Less(y Uint16x16) Mask16x16 -// Add adds corresponding elements of two vectors. +// Less compares for less than. // -// Asm: VPADDW, CPU Feature: AVX512EVEX -func (x Uint16x32) MaskedAdd(y Uint16x32, z Mask16x32) Uint16x32 +// Asm: VPCMPUW, CPU Feature: AVX512EVEX +func (x Uint16x32) Less(y Uint16x32) Mask16x32 -// Add adds corresponding elements of two vectors. +// Less compares for less than. // -// Asm: VPADDD, CPU Feature: AVX512EVEX -func (x Uint32x4) MaskedAdd(y Uint32x4, z Mask32x4) Uint32x4 +// Asm: VPCMPUD, CPU Feature: AVX512EVEX +func (x Uint32x4) Less(y Uint32x4) Mask32x4 -// Add adds corresponding elements of two vectors. +// Less compares for less than. // -// Asm: VPADDD, CPU Feature: AVX512EVEX -func (x Uint32x8) MaskedAdd(y Uint32x8, z Mask32x8) Uint32x8 +// Asm: VPCMPUD, CPU Feature: AVX512EVEX +func (x Uint32x8) Less(y Uint32x8) Mask32x8 -// Add adds corresponding elements of two vectors. +// Less compares for less than. // -// Asm: VPADDD, CPU Feature: AVX512EVEX -func (x Uint32x16) MaskedAdd(y Uint32x16, z Mask32x16) Uint32x16 +// Asm: VPCMPUD, CPU Feature: AVX512EVEX +func (x Uint32x16) Less(y Uint32x16) Mask32x16 -// Add adds corresponding elements of two vectors. +// Less compares for less than. // -// Asm: VPADDQ, CPU Feature: AVX512EVEX -func (x Uint64x2) MaskedAdd(y Uint64x2, z Mask64x2) Uint64x2 +// Asm: VPCMPUQ, CPU Feature: AVX512EVEX +func (x Uint64x2) Less(y Uint64x2) Mask64x2 -// Add adds corresponding elements of two vectors. +// Less compares for less than. // -// Asm: VPADDQ, CPU Feature: AVX512EVEX -func (x Uint64x4) MaskedAdd(y Uint64x4, z Mask64x4) Uint64x4 +// Asm: VPCMPUQ, CPU Feature: AVX512EVEX +func (x Uint64x4) Less(y Uint64x4) Mask64x4 -// Add adds corresponding elements of two vectors. +// Less compares for less than. // -// Asm: VPADDQ, CPU Feature: AVX512EVEX -func (x Uint64x8) MaskedAdd(y Uint64x8, z Mask64x8) Uint64x8 +// Asm: VPCMPUQ, CPU Feature: AVX512EVEX +func (x Uint64x8) Less(y Uint64x8) Mask64x8 -/* MaskedAnd */ +/* LessEqual */ -// And performs a masked bitwise AND operation between two vectors. +// LessEqual compares for less than or equal. // -// Asm: VPANDD, CPU Feature: AVX512EVEX -func (x Int32x4) MaskedAnd(y Int32x4, z Mask32x4) Int32x4 +// Asm: VCMPPS, CPU Feature: AVX +func (x Float32x4) LessEqual(y Float32x4) Mask32x4 -// And performs a masked bitwise AND operation between two vectors. +// LessEqual compares for less than or equal. // -// Asm: VPANDD, CPU Feature: AVX512EVEX -func (x Int32x8) MaskedAnd(y Int32x8, z Mask32x8) Int32x8 +// Asm: VCMPPS, CPU Feature: AVX +func (x Float32x8) LessEqual(y Float32x8) Mask32x8 -// And performs a masked bitwise AND operation between two vectors. +// LessEqual compares for less than or equal. // -// Asm: VPANDD, CPU Feature: AVX512EVEX -func (x Int32x16) MaskedAnd(y Int32x16, z Mask32x16) Int32x16 +// Asm: VCMPPS, CPU Feature: AVX512EVEX +func (x Float32x16) LessEqual(y Float32x16) Mask32x16 -// And performs a masked bitwise AND operation between two vectors. +// LessEqual compares for less than or equal. // -// Asm: VPANDQ, CPU Feature: AVX512EVEX -func (x Int64x2) MaskedAnd(y Int64x2, z Mask64x2) Int64x2 +// Asm: VCMPPD, CPU Feature: AVX +func (x Float64x2) LessEqual(y Float64x2) Mask64x2 -// And performs a masked bitwise AND operation between two vectors. +// LessEqual compares for less than or equal. // -// Asm: VPANDQ, CPU Feature: AVX512EVEX -func (x Int64x4) MaskedAnd(y Int64x4, z Mask64x4) Int64x4 +// Asm: VCMPPD, CPU Feature: AVX +func (x Float64x4) LessEqual(y Float64x4) Mask64x4 -// And performs a masked bitwise AND operation between two vectors. +// LessEqual compares for less than or equal. // -// Asm: VPANDQ, CPU Feature: AVX512EVEX -func (x Int64x8) MaskedAnd(y Int64x8, z Mask64x8) Int64x8 +// Asm: VCMPPD, CPU Feature: AVX512EVEX +func (x Float64x8) LessEqual(y Float64x8) Mask64x8 -// And performs a masked bitwise AND operation between two vectors. +// LessEqual compares for less than or equal. // -// Asm: VPANDD, CPU Feature: AVX512EVEX -func (x Uint32x4) MaskedAnd(y Uint32x4, z Mask32x4) Uint32x4 +// Asm: VPCMPB, CPU Feature: AVX512EVEX +func (x Int8x16) LessEqual(y Int8x16) Mask8x16 -// And performs a masked bitwise AND operation between two vectors. +// LessEqual compares for less than or equal. // -// Asm: VPANDD, CPU Feature: AVX512EVEX -func (x Uint32x8) MaskedAnd(y Uint32x8, z Mask32x8) Uint32x8 +// Asm: VPCMPB, CPU Feature: AVX512EVEX +func (x Int8x32) LessEqual(y Int8x32) Mask8x32 -// And performs a masked bitwise AND operation between two vectors. +// LessEqual compares for less than or equal. // -// Asm: VPANDD, CPU Feature: AVX512EVEX -func (x Uint32x16) MaskedAnd(y Uint32x16, z Mask32x16) Uint32x16 +// Asm: VPCMPB, CPU Feature: AVX512EVEX +func (x Int8x64) LessEqual(y Int8x64) Mask8x64 -// And performs a masked bitwise AND operation between two vectors. +// LessEqual compares for less than or equal. // -// Asm: VPANDQ, CPU Feature: AVX512EVEX -func (x Uint64x2) MaskedAnd(y Uint64x2, z Mask64x2) Uint64x2 +// Asm: VPCMPW, CPU Feature: AVX512EVEX +func (x Int16x8) LessEqual(y Int16x8) Mask16x8 -// And performs a masked bitwise AND operation between two vectors. +// LessEqual compares for less than or equal. // -// Asm: VPANDQ, CPU Feature: AVX512EVEX -func (x Uint64x4) MaskedAnd(y Uint64x4, z Mask64x4) Uint64x4 +// Asm: VPCMPW, CPU Feature: AVX512EVEX +func (x Int16x16) LessEqual(y Int16x16) Mask16x16 -// And performs a masked bitwise AND operation between two vectors. +// LessEqual compares for less than or equal. // -// Asm: VPANDQ, CPU Feature: AVX512EVEX -func (x Uint64x8) MaskedAnd(y Uint64x8, z Mask64x8) Uint64x8 - -/* MaskedAndNot */ +// Asm: VPCMPW, CPU Feature: AVX512EVEX +func (x Int16x32) LessEqual(y Int16x32) Mask16x32 -// AndNot performs a masked bitwise AND NOT operation between two vectors. +// LessEqual compares for less than or equal. // -// Asm: VPANDND, CPU Feature: AVX512EVEX -func (x Int32x4) MaskedAndNot(y Int32x4, z Mask32x4) Int32x4 +// Asm: VPCMPD, CPU Feature: AVX512EVEX +func (x Int32x4) LessEqual(y Int32x4) Mask32x4 -// AndNot performs a masked bitwise AND NOT operation between two vectors. +// LessEqual compares for less than or equal. // -// Asm: VPANDND, CPU Feature: AVX512EVEX -func (x Int32x8) MaskedAndNot(y Int32x8, z Mask32x8) Int32x8 +// Asm: VPCMPD, CPU Feature: AVX512EVEX +func (x Int32x8) LessEqual(y Int32x8) Mask32x8 -// AndNot performs a masked bitwise AND NOT operation between two vectors. +// LessEqual compares for less than or equal. // -// Asm: VPANDND, CPU Feature: AVX512EVEX -func (x Int32x16) MaskedAndNot(y Int32x16, z Mask32x16) Int32x16 +// Asm: VPCMPD, CPU Feature: AVX512EVEX +func (x Int32x16) LessEqual(y Int32x16) Mask32x16 -// AndNot performs a masked bitwise AND NOT operation between two vectors. +// LessEqual compares for less than or equal. // -// Asm: VPANDNQ, CPU Feature: AVX512EVEX -func (x Int64x2) MaskedAndNot(y Int64x2, z Mask64x2) Int64x2 +// Asm: VPCMPQ, CPU Feature: AVX512EVEX +func (x Int64x2) LessEqual(y Int64x2) Mask64x2 -// AndNot performs a masked bitwise AND NOT operation between two vectors. +// LessEqual compares for less than or equal. // -// Asm: VPANDNQ, CPU Feature: AVX512EVEX -func (x Int64x4) MaskedAndNot(y Int64x4, z Mask64x4) Int64x4 +// Asm: VPCMPQ, CPU Feature: AVX512EVEX +func (x Int64x4) LessEqual(y Int64x4) Mask64x4 -// AndNot performs a masked bitwise AND NOT operation between two vectors. +// LessEqual compares for less than or equal. // -// Asm: VPANDNQ, CPU Feature: AVX512EVEX -func (x Int64x8) MaskedAndNot(y Int64x8, z Mask64x8) Int64x8 +// Asm: VPCMPQ, CPU Feature: AVX512EVEX +func (x Int64x8) LessEqual(y Int64x8) Mask64x8 -// AndNot performs a masked bitwise AND NOT operation between two vectors. +// LessEqual compares for less than or equal. // -// Asm: VPANDND, CPU Feature: AVX512EVEX -func (x Uint32x4) MaskedAndNot(y Uint32x4, z Mask32x4) Uint32x4 +// Asm: VPCMPUB, CPU Feature: AVX512EVEX +func (x Uint8x16) LessEqual(y Uint8x16) Mask8x16 -// AndNot performs a masked bitwise AND NOT operation between two vectors. +// LessEqual compares for less than or equal. // -// Asm: VPANDND, CPU Feature: AVX512EVEX -func (x Uint32x8) MaskedAndNot(y Uint32x8, z Mask32x8) Uint32x8 +// Asm: VPCMPUB, CPU Feature: AVX512EVEX +func (x Uint8x32) LessEqual(y Uint8x32) Mask8x32 -// AndNot performs a masked bitwise AND NOT operation between two vectors. +// LessEqual compares for less than or equal. // -// Asm: VPANDND, CPU Feature: AVX512EVEX -func (x Uint32x16) MaskedAndNot(y Uint32x16, z Mask32x16) Uint32x16 +// Asm: VPCMPUB, CPU Feature: AVX512EVEX +func (x Uint8x64) LessEqual(y Uint8x64) Mask8x64 -// AndNot performs a masked bitwise AND NOT operation between two vectors. +// LessEqual compares for less than or equal. // -// Asm: VPANDNQ, CPU Feature: AVX512EVEX -func (x Uint64x2) MaskedAndNot(y Uint64x2, z Mask64x2) Uint64x2 +// Asm: VPCMPUW, CPU Feature: AVX512EVEX +func (x Uint16x8) LessEqual(y Uint16x8) Mask16x8 -// AndNot performs a masked bitwise AND NOT operation between two vectors. +// LessEqual compares for less than or equal. // -// Asm: VPANDNQ, CPU Feature: AVX512EVEX -func (x Uint64x4) MaskedAndNot(y Uint64x4, z Mask64x4) Uint64x4 +// Asm: VPCMPUW, CPU Feature: AVX512EVEX +func (x Uint16x16) LessEqual(y Uint16x16) Mask16x16 -// AndNot performs a masked bitwise AND NOT operation between two vectors. +// LessEqual compares for less than or equal. // -// Asm: VPANDNQ, CPU Feature: AVX512EVEX -func (x Uint64x8) MaskedAndNot(y Uint64x8, z Mask64x8) Uint64x8 - -/* MaskedApproximateReciprocal */ +// Asm: VPCMPUW, CPU Feature: AVX512EVEX +func (x Uint16x32) LessEqual(y Uint16x32) Mask16x32 -// ApproximateReciprocal computes an approximate reciprocal of each element. +// LessEqual compares for less than or equal. // -// Asm: VRCP14PS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedApproximateReciprocal(y Mask32x4) Float32x4 +// Asm: VPCMPUD, CPU Feature: AVX512EVEX +func (x Uint32x4) LessEqual(y Uint32x4) Mask32x4 -// ApproximateReciprocal computes an approximate reciprocal of each element. +// LessEqual compares for less than or equal. // -// Asm: VRCP14PS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedApproximateReciprocal(y Mask32x8) Float32x8 +// Asm: VPCMPUD, CPU Feature: AVX512EVEX +func (x Uint32x8) LessEqual(y Uint32x8) Mask32x8 -// ApproximateReciprocal computes an approximate reciprocal of each element. +// LessEqual compares for less than or equal. // -// Asm: VRCP14PS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedApproximateReciprocal(y Mask32x16) Float32x16 +// Asm: VPCMPUD, CPU Feature: AVX512EVEX +func (x Uint32x16) LessEqual(y Uint32x16) Mask32x16 -// ApproximateReciprocal computes an approximate reciprocal of each element. +// LessEqual compares for less than or equal. // -// Asm: VRCP14PD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedApproximateReciprocal(y Mask64x2) Float64x2 +// Asm: VPCMPUQ, CPU Feature: AVX512EVEX +func (x Uint64x2) LessEqual(y Uint64x2) Mask64x2 -// ApproximateReciprocal computes an approximate reciprocal of each element. +// LessEqual compares for less than or equal. // -// Asm: VRCP14PD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedApproximateReciprocal(y Mask64x4) Float64x4 +// Asm: VPCMPUQ, CPU Feature: AVX512EVEX +func (x Uint64x4) LessEqual(y Uint64x4) Mask64x4 -// ApproximateReciprocal computes an approximate reciprocal of each element. +// LessEqual compares for less than or equal. // -// Asm: VRCP14PD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedApproximateReciprocal(y Mask64x8) Float64x8 +// Asm: VPCMPUQ, CPU Feature: AVX512EVEX +func (x Uint64x8) LessEqual(y Uint64x8) Mask64x8 -/* MaskedApproximateReciprocalOfSqrt */ +/* LessEqualMasked */ -// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element. +// LessEqual compares for less than or equal. // -// Asm: VRSQRT14PS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedApproximateReciprocalOfSqrt(y Mask32x4) Float32x4 +// Asm: VCMPPS, CPU Feature: AVX512EVEX +func (x Float32x4) LessEqualMasked(y Float32x4, z Mask32x4) Mask32x4 -// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element. +// LessEqual compares for less than or equal. // -// Asm: VRSQRT14PS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedApproximateReciprocalOfSqrt(y Mask32x8) Float32x8 +// Asm: VCMPPS, CPU Feature: AVX512EVEX +func (x Float32x8) LessEqualMasked(y Float32x8, z Mask32x8) Mask32x8 -// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element. +// LessEqual compares for less than or equal. // -// Asm: VRSQRT14PS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedApproximateReciprocalOfSqrt(y Mask32x16) Float32x16 +// Asm: VCMPPS, CPU Feature: AVX512EVEX +func (x Float32x16) LessEqualMasked(y Float32x16, z Mask32x16) Mask32x16 -// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element. +// LessEqual compares for less than or equal. // -// Asm: VRSQRT14PD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedApproximateReciprocalOfSqrt(y Mask64x2) Float64x2 +// Asm: VCMPPD, CPU Feature: AVX512EVEX +func (x Float64x2) LessEqualMasked(y Float64x2, z Mask64x2) Mask64x2 -// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element. +// LessEqual compares for less than or equal. // -// Asm: VRSQRT14PD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedApproximateReciprocalOfSqrt(y Mask64x4) Float64x4 +// Asm: VCMPPD, CPU Feature: AVX512EVEX +func (x Float64x4) LessEqualMasked(y Float64x4, z Mask64x4) Mask64x4 -// ApproximateReciprocalOfSqrt computes an approximate reciprocal of the square root of each element. +// LessEqual compares for less than or equal. // -// Asm: VRSQRT14PD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedApproximateReciprocalOfSqrt(y Mask64x8) Float64x8 - -/* MaskedAverage */ +// Asm: VCMPPD, CPU Feature: AVX512EVEX +func (x Float64x8) LessEqualMasked(y Float64x8, z Mask64x8) Mask64x8 -// Average computes the rounded average of corresponding elements. +// LessEqual compares for less than or equal. // -// Asm: VPAVGB, CPU Feature: AVX512EVEX -func (x Uint8x16) MaskedAverage(y Uint8x16, z Mask8x16) Uint8x16 +// Asm: VPCMPB, CPU Feature: AVX512EVEX +func (x Int8x16) LessEqualMasked(y Int8x16, z Mask8x16) Mask8x16 -// Average computes the rounded average of corresponding elements. +// LessEqual compares for less than or equal. // -// Asm: VPAVGB, CPU Feature: AVX512EVEX -func (x Uint8x32) MaskedAverage(y Uint8x32, z Mask8x32) Uint8x32 +// Asm: VPCMPB, CPU Feature: AVX512EVEX +func (x Int8x32) LessEqualMasked(y Int8x32, z Mask8x32) Mask8x32 -// Average computes the rounded average of corresponding elements. +// LessEqual compares for less than or equal. // -// Asm: VPAVGB, CPU Feature: AVX512EVEX -func (x Uint8x64) MaskedAverage(y Uint8x64, z Mask8x64) Uint8x64 +// Asm: VPCMPB, CPU Feature: AVX512EVEX +func (x Int8x64) LessEqualMasked(y Int8x64, z Mask8x64) Mask8x64 -// Average computes the rounded average of corresponding elements. +// LessEqual compares for less than or equal. // -// Asm: VPAVGW, CPU Feature: AVX512EVEX -func (x Uint16x8) MaskedAverage(y Uint16x8, z Mask16x8) Uint16x8 +// Asm: VPCMPW, CPU Feature: AVX512EVEX +func (x Int16x8) LessEqualMasked(y Int16x8, z Mask16x8) Mask16x8 -// Average computes the rounded average of corresponding elements. +// LessEqual compares for less than or equal. // -// Asm: VPAVGW, CPU Feature: AVX512EVEX -func (x Uint16x16) MaskedAverage(y Uint16x16, z Mask16x16) Uint16x16 +// Asm: VPCMPW, CPU Feature: AVX512EVEX +func (x Int16x16) LessEqualMasked(y Int16x16, z Mask16x16) Mask16x16 -// Average computes the rounded average of corresponding elements. +// LessEqual compares for less than or equal. // -// Asm: VPAVGW, CPU Feature: AVX512EVEX -func (x Uint16x32) MaskedAverage(y Uint16x32, z Mask16x32) Uint16x32 - -/* MaskedCeilWithPrecision */ +// Asm: VPCMPW, CPU Feature: AVX512EVEX +func (x Int16x32) LessEqualMasked(y Int16x32, z Mask16x32) Mask16x32 -// CeilWithPrecision rounds elements up with specified precision, masked. +// LessEqual compares for less than or equal. // -// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedCeilWithPrecision(imm uint8, y Mask32x4) Float32x4 +// Asm: VPCMPD, CPU Feature: AVX512EVEX +func (x Int32x4) LessEqualMasked(y Int32x4, z Mask32x4) Mask32x4 -// CeilWithPrecision rounds elements up with specified precision, masked. +// LessEqual compares for less than or equal. // -// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedCeilWithPrecision(imm uint8, y Mask32x8) Float32x8 +// Asm: VPCMPD, CPU Feature: AVX512EVEX +func (x Int32x8) LessEqualMasked(y Int32x8, z Mask32x8) Mask32x8 -// CeilWithPrecision rounds elements up with specified precision, masked. +// LessEqual compares for less than or equal. // -// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedCeilWithPrecision(imm uint8, y Mask32x16) Float32x16 +// Asm: VPCMPD, CPU Feature: AVX512EVEX +func (x Int32x16) LessEqualMasked(y Int32x16, z Mask32x16) Mask32x16 -// CeilWithPrecision rounds elements up with specified precision, masked. +// LessEqual compares for less than or equal. // -// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedCeilWithPrecision(imm uint8, y Mask64x2) Float64x2 +// Asm: VPCMPQ, CPU Feature: AVX512EVEX +func (x Int64x2) LessEqualMasked(y Int64x2, z Mask64x2) Mask64x2 -// CeilWithPrecision rounds elements up with specified precision, masked. +// LessEqual compares for less than or equal. // -// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedCeilWithPrecision(imm uint8, y Mask64x4) Float64x4 +// Asm: VPCMPQ, CPU Feature: AVX512EVEX +func (x Int64x4) LessEqualMasked(y Int64x4, z Mask64x4) Mask64x4 -// CeilWithPrecision rounds elements up with specified precision, masked. +// LessEqual compares for less than or equal. // -// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedCeilWithPrecision(imm uint8, y Mask64x8) Float64x8 - -/* MaskedDiffWithCeilWithPrecision */ +// Asm: VPCMPQ, CPU Feature: AVX512EVEX +func (x Int64x8) LessEqualMasked(y Int64x8, z Mask64x8) Mask64x8 -// DiffWithCeilWithPrecision computes the difference after ceiling with specified precision. +// LessEqual compares for less than or equal. // -// Asm: VREDUCEPS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedDiffWithCeilWithPrecision(imm uint8, y Mask32x4) Float32x4 +// Asm: VPCMPUB, CPU Feature: AVX512EVEX +func (x Uint8x16) LessEqualMasked(y Uint8x16, z Mask8x16) Mask8x16 -// DiffWithCeilWithPrecision computes the difference after ceiling with specified precision. +// LessEqual compares for less than or equal. // -// Asm: VREDUCEPS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedDiffWithCeilWithPrecision(imm uint8, y Mask32x8) Float32x8 +// Asm: VPCMPUB, CPU Feature: AVX512EVEX +func (x Uint8x32) LessEqualMasked(y Uint8x32, z Mask8x32) Mask8x32 -// DiffWithCeilWithPrecision computes the difference after ceiling with specified precision. +// LessEqual compares for less than or equal. // -// Asm: VREDUCEPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedDiffWithCeilWithPrecision(imm uint8, y Mask32x16) Float32x16 +// Asm: VPCMPUB, CPU Feature: AVX512EVEX +func (x Uint8x64) LessEqualMasked(y Uint8x64, z Mask8x64) Mask8x64 -// DiffWithCeilWithPrecision computes the difference after ceiling with specified precision. +// LessEqual compares for less than or equal. // -// Asm: VREDUCEPD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedDiffWithCeilWithPrecision(imm uint8, y Mask64x2) Float64x2 +// Asm: VPCMPUW, CPU Feature: AVX512EVEX +func (x Uint16x8) LessEqualMasked(y Uint16x8, z Mask16x8) Mask16x8 -// DiffWithCeilWithPrecision computes the difference after ceiling with specified precision. +// LessEqual compares for less than or equal. // -// Asm: VREDUCEPD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedDiffWithCeilWithPrecision(imm uint8, y Mask64x4) Float64x4 +// Asm: VPCMPUW, CPU Feature: AVX512EVEX +func (x Uint16x16) LessEqualMasked(y Uint16x16, z Mask16x16) Mask16x16 -// DiffWithCeilWithPrecision computes the difference after ceiling with specified precision. +// LessEqual compares for less than or equal. // -// Asm: VREDUCEPD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedDiffWithCeilWithPrecision(imm uint8, y Mask64x8) Float64x8 - -/* MaskedDiffWithFloorWithPrecision */ +// Asm: VPCMPUW, CPU Feature: AVX512EVEX +func (x Uint16x32) LessEqualMasked(y Uint16x32, z Mask16x32) Mask16x32 -// DiffWithFloorWithPrecision computes the difference after flooring with specified precision. +// LessEqual compares for less than or equal. // -// Asm: VREDUCEPS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedDiffWithFloorWithPrecision(imm uint8, y Mask32x4) Float32x4 +// Asm: VPCMPUD, CPU Feature: AVX512EVEX +func (x Uint32x4) LessEqualMasked(y Uint32x4, z Mask32x4) Mask32x4 -// DiffWithFloorWithPrecision computes the difference after flooring with specified precision. +// LessEqual compares for less than or equal. // -// Asm: VREDUCEPS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedDiffWithFloorWithPrecision(imm uint8, y Mask32x8) Float32x8 +// Asm: VPCMPUD, CPU Feature: AVX512EVEX +func (x Uint32x8) LessEqualMasked(y Uint32x8, z Mask32x8) Mask32x8 -// DiffWithFloorWithPrecision computes the difference after flooring with specified precision. +// LessEqual compares for less than or equal. // -// Asm: VREDUCEPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedDiffWithFloorWithPrecision(imm uint8, y Mask32x16) Float32x16 +// Asm: VPCMPUD, CPU Feature: AVX512EVEX +func (x Uint32x16) LessEqualMasked(y Uint32x16, z Mask32x16) Mask32x16 -// DiffWithFloorWithPrecision computes the difference after flooring with specified precision. +// LessEqual compares for less than or equal. // -// Asm: VREDUCEPD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedDiffWithFloorWithPrecision(imm uint8, y Mask64x2) Float64x2 +// Asm: VPCMPUQ, CPU Feature: AVX512EVEX +func (x Uint64x2) LessEqualMasked(y Uint64x2, z Mask64x2) Mask64x2 -// DiffWithFloorWithPrecision computes the difference after flooring with specified precision. +// LessEqual compares for less than or equal. // -// Asm: VREDUCEPD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedDiffWithFloorWithPrecision(imm uint8, y Mask64x4) Float64x4 +// Asm: VPCMPUQ, CPU Feature: AVX512EVEX +func (x Uint64x4) LessEqualMasked(y Uint64x4, z Mask64x4) Mask64x4 -// DiffWithFloorWithPrecision computes the difference after flooring with specified precision. -// -// Asm: VREDUCEPD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedDiffWithFloorWithPrecision(imm uint8, y Mask64x8) Float64x8 - -/* MaskedDiffWithRoundWithPrecision */ - -// DiffWithRoundWithPrecision computes the difference after rounding with specified precision. -// -// Asm: VREDUCEPS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedDiffWithRoundWithPrecision(imm uint8, y Mask32x4) Float32x4 - -// DiffWithRoundWithPrecision computes the difference after rounding with specified precision. -// -// Asm: VREDUCEPS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedDiffWithRoundWithPrecision(imm uint8, y Mask32x8) Float32x8 - -// DiffWithRoundWithPrecision computes the difference after rounding with specified precision. -// -// Asm: VREDUCEPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedDiffWithRoundWithPrecision(imm uint8, y Mask32x16) Float32x16 - -// DiffWithRoundWithPrecision computes the difference after rounding with specified precision. -// -// Asm: VREDUCEPD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedDiffWithRoundWithPrecision(imm uint8, y Mask64x2) Float64x2 - -// DiffWithRoundWithPrecision computes the difference after rounding with specified precision. -// -// Asm: VREDUCEPD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedDiffWithRoundWithPrecision(imm uint8, y Mask64x4) Float64x4 - -// DiffWithRoundWithPrecision computes the difference after rounding with specified precision. -// -// Asm: VREDUCEPD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedDiffWithRoundWithPrecision(imm uint8, y Mask64x8) Float64x8 - -/* MaskedDiffWithTruncWithPrecision */ - -// DiffWithTruncWithPrecision computes the difference after truncating with specified precision. -// -// Asm: VREDUCEPS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedDiffWithTruncWithPrecision(imm uint8, y Mask32x4) Float32x4 - -// DiffWithTruncWithPrecision computes the difference after truncating with specified precision. -// -// Asm: VREDUCEPS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedDiffWithTruncWithPrecision(imm uint8, y Mask32x8) Float32x8 - -// DiffWithTruncWithPrecision computes the difference after truncating with specified precision. -// -// Asm: VREDUCEPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedDiffWithTruncWithPrecision(imm uint8, y Mask32x16) Float32x16 - -// DiffWithTruncWithPrecision computes the difference after truncating with specified precision. -// -// Asm: VREDUCEPD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedDiffWithTruncWithPrecision(imm uint8, y Mask64x2) Float64x2 - -// DiffWithTruncWithPrecision computes the difference after truncating with specified precision. -// -// Asm: VREDUCEPD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedDiffWithTruncWithPrecision(imm uint8, y Mask64x4) Float64x4 - -// DiffWithTruncWithPrecision computes the difference after truncating with specified precision. -// -// Asm: VREDUCEPD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedDiffWithTruncWithPrecision(imm uint8, y Mask64x8) Float64x8 - -/* MaskedDiv */ - -// Div divides elements of two vectors. -// -// Asm: VDIVPS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedDiv(y Float32x4, z Mask32x4) Float32x4 - -// Div divides elements of two vectors. -// -// Asm: VDIVPS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedDiv(y Float32x8, z Mask32x8) Float32x8 - -// Div divides elements of two vectors. -// -// Asm: VDIVPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedDiv(y Float32x16, z Mask32x16) Float32x16 - -// Div divides elements of two vectors. -// -// Asm: VDIVPD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedDiv(y Float64x2, z Mask64x2) Float64x2 - -// Div divides elements of two vectors. -// -// Asm: VDIVPD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedDiv(y Float64x4, z Mask64x4) Float64x4 - -// Div divides elements of two vectors. +// LessEqual compares for less than or equal. // -// Asm: VDIVPD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedDiv(y Float64x8, z Mask64x8) Float64x8 +// Asm: VPCMPUQ, CPU Feature: AVX512EVEX +func (x Uint64x8) LessEqualMasked(y Uint64x8, z Mask64x8) Mask64x8 -/* MaskedEqual */ +/* LessMasked */ -// Equal compares for equality, masked. +// Less compares for less than. // // Asm: VCMPPS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedEqual(y Float32x4, z Mask32x4) Mask32x4 +func (x Float32x4) LessMasked(y Float32x4, z Mask32x4) Mask32x4 -// Equal compares for equality, masked. +// Less compares for less than. // // Asm: VCMPPS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedEqual(y Float32x8, z Mask32x8) Mask32x8 +func (x Float32x8) LessMasked(y Float32x8, z Mask32x8) Mask32x8 -// Equal compares for equality, masked. +// Less compares for less than. // // Asm: VCMPPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedEqual(y Float32x16, z Mask32x16) Mask32x16 +func (x Float32x16) LessMasked(y Float32x16, z Mask32x16) Mask32x16 -// Equal compares for equality, masked. +// Less compares for less than. // // Asm: VCMPPD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedEqual(y Float64x2, z Mask64x2) Mask64x2 +func (x Float64x2) LessMasked(y Float64x2, z Mask64x2) Mask64x2 -// Equal compares for equality, masked. +// Less compares for less than. // // Asm: VCMPPD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedEqual(y Float64x4, z Mask64x4) Mask64x4 +func (x Float64x4) LessMasked(y Float64x4, z Mask64x4) Mask64x4 -// Equal compares for equality, masked. +// Less compares for less than. // // Asm: VCMPPD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedEqual(y Float64x8, z Mask64x8) Mask64x8 - -// Equal compares for equality, masked. -// -// Asm: VPCMPB, CPU Feature: AVX512EVEX -func (x Int8x16) MaskedEqual(y Int8x16, z Mask8x16) Mask8x16 - -// Equal compares for equality, masked. -// -// Asm: VPCMPB, CPU Feature: AVX512EVEX -func (x Int8x32) MaskedEqual(y Int8x32, z Mask8x32) Mask8x32 - -// Equal compares for equality, masked. -// -// Asm: VPCMPB, CPU Feature: AVX512EVEX -func (x Int8x64) MaskedEqual(y Int8x64, z Mask8x64) Mask8x64 - -// Equal compares for equality, masked. -// -// Asm: VPCMPW, CPU Feature: AVX512EVEX -func (x Int16x8) MaskedEqual(y Int16x8, z Mask16x8) Mask16x8 - -// Equal compares for equality, masked. -// -// Asm: VPCMPW, CPU Feature: AVX512EVEX -func (x Int16x16) MaskedEqual(y Int16x16, z Mask16x16) Mask16x16 - -// Equal compares for equality, masked. -// -// Asm: VPCMPW, CPU Feature: AVX512EVEX -func (x Int16x32) MaskedEqual(y Int16x32, z Mask16x32) Mask16x32 - -// Equal compares for equality, masked. -// -// Asm: VPCMPD, CPU Feature: AVX512EVEX -func (x Int32x4) MaskedEqual(y Int32x4, z Mask32x4) Mask32x4 - -// Equal compares for equality, masked. -// -// Asm: VPCMPD, CPU Feature: AVX512EVEX -func (x Int32x8) MaskedEqual(y Int32x8, z Mask32x8) Mask32x8 - -// Equal compares for equality, masked. -// -// Asm: VPCMPD, CPU Feature: AVX512EVEX -func (x Int32x16) MaskedEqual(y Int32x16, z Mask32x16) Mask32x16 - -// Equal compares for equality, masked. -// -// Asm: VPCMPQ, CPU Feature: AVX512EVEX -func (x Int64x2) MaskedEqual(y Int64x2, z Mask64x2) Mask64x2 - -// Equal compares for equality, masked. -// -// Asm: VPCMPQ, CPU Feature: AVX512EVEX -func (x Int64x4) MaskedEqual(y Int64x4, z Mask64x4) Mask64x4 - -// Equal compares for equality, masked. -// -// Asm: VPCMPQ, CPU Feature: AVX512EVEX -func (x Int64x8) MaskedEqual(y Int64x8, z Mask64x8) Mask64x8 - -// Equal compares for equality, masked. -// -// Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x16) MaskedEqual(y Uint8x16, z Mask8x16) Mask8x16 - -// Equal compares for equality, masked. -// -// Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x32) MaskedEqual(y Uint8x32, z Mask8x32) Mask8x32 - -// Equal compares for equality, masked. -// -// Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x64) MaskedEqual(y Uint8x64, z Mask8x64) Mask8x64 - -// Equal compares for equality, masked. -// -// Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x8) MaskedEqual(y Uint16x8, z Mask16x8) Mask16x8 - -// Equal compares for equality, masked. -// -// Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x16) MaskedEqual(y Uint16x16, z Mask16x16) Mask16x16 - -// Equal compares for equality, masked. -// -// Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x32) MaskedEqual(y Uint16x32, z Mask16x32) Mask16x32 - -// Equal compares for equality, masked. -// -// Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x4) MaskedEqual(y Uint32x4, z Mask32x4) Mask32x4 - -// Equal compares for equality, masked. -// -// Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x8) MaskedEqual(y Uint32x8, z Mask32x8) Mask32x8 - -// Equal compares for equality, masked. -// -// Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x16) MaskedEqual(y Uint32x16, z Mask32x16) Mask32x16 - -// Equal compares for equality, masked. -// -// Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x2) MaskedEqual(y Uint64x2, z Mask64x2) Mask64x2 - -// Equal compares for equality, masked. -// -// Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x4) MaskedEqual(y Uint64x4, z Mask64x4) Mask64x4 - -// Equal compares for equality, masked. -// -// Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x8) MaskedEqual(y Uint64x8, z Mask64x8) Mask64x8 - -/* MaskedFloorWithPrecision */ - -// FloorWithPrecision rounds elements down with specified precision, masked. -// -// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedFloorWithPrecision(imm uint8, y Mask32x4) Float32x4 +func (x Float64x8) LessMasked(y Float64x8, z Mask64x8) Mask64x8 -// FloorWithPrecision rounds elements down with specified precision, masked. -// -// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedFloorWithPrecision(imm uint8, y Mask32x8) Float32x8 - -// FloorWithPrecision rounds elements down with specified precision, masked. -// -// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedFloorWithPrecision(imm uint8, y Mask32x16) Float32x16 - -// FloorWithPrecision rounds elements down with specified precision, masked. -// -// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedFloorWithPrecision(imm uint8, y Mask64x2) Float64x2 - -// FloorWithPrecision rounds elements down with specified precision, masked. -// -// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedFloorWithPrecision(imm uint8, y Mask64x4) Float64x4 - -// FloorWithPrecision rounds elements down with specified precision, masked. -// -// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedFloorWithPrecision(imm uint8, y Mask64x8) Float64x8 - -/* MaskedFusedMultiplyAdd */ - -// FusedMultiplyAdd performs `(v1 * v2) + v3`. -// -// Asm: VFMADD213PS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedFusedMultiplyAdd(y Float32x4, z Float32x4, u Mask32x4) Float32x4 - -// FusedMultiplyAdd performs `(v1 * v2) + v3`. -// -// Asm: VFMADD213PS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedFusedMultiplyAdd(y Float32x8, z Float32x8, u Mask32x8) Float32x8 - -// FusedMultiplyAdd performs `(v1 * v2) + v3`. -// -// Asm: VFMADD213PS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedFusedMultiplyAdd(y Float32x16, z Float32x16, u Mask32x16) Float32x16 - -// FusedMultiplyAdd performs `(v1 * v2) + v3`. -// -// Asm: VFMADD213PD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedFusedMultiplyAdd(y Float64x2, z Float64x2, u Mask64x2) Float64x2 - -// FusedMultiplyAdd performs `(v1 * v2) + v3`. -// -// Asm: VFMADD213PD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedFusedMultiplyAdd(y Float64x4, z Float64x4, u Mask64x4) Float64x4 - -// FusedMultiplyAdd performs `(v1 * v2) + v3`. -// -// Asm: VFMADD213PD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedFusedMultiplyAdd(y Float64x8, z Float64x8, u Mask64x8) Float64x8 - -/* MaskedFusedMultiplyAddSub */ - -// FusedMultiplyAddSub performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements. -// -// Asm: VFMADDSUB213PS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedFusedMultiplyAddSub(y Float32x4, z Float32x4, u Mask32x4) Float32x4 - -// FusedMultiplyAddSub performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements. -// -// Asm: VFMADDSUB213PS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedFusedMultiplyAddSub(y Float32x8, z Float32x8, u Mask32x8) Float32x8 - -// FusedMultiplyAddSub performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements. -// -// Asm: VFMADDSUB213PS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedFusedMultiplyAddSub(y Float32x16, z Float32x16, u Mask32x16) Float32x16 - -// FusedMultiplyAddSub performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements. -// -// Asm: VFMADDSUB213PD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedFusedMultiplyAddSub(y Float64x2, z Float64x2, u Mask64x2) Float64x2 - -// FusedMultiplyAddSub performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements. -// -// Asm: VFMADDSUB213PD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedFusedMultiplyAddSub(y Float64x4, z Float64x4, u Mask64x4) Float64x4 - -// FusedMultiplyAddSub performs `(v1 * v2) - v3` for odd-indexed elements, and `(v1 * v2) + v3` for even-indexed elements. -// -// Asm: VFMADDSUB213PD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedFusedMultiplyAddSub(y Float64x8, z Float64x8, u Mask64x8) Float64x8 - -/* MaskedFusedMultiplySubAdd */ - -// FusedMultiplySubAdd performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements. -// -// Asm: VFMSUBADD213PS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedFusedMultiplySubAdd(y Float32x4, z Float32x4, u Mask32x4) Float32x4 - -// FusedMultiplySubAdd performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements. -// -// Asm: VFMSUBADD213PS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedFusedMultiplySubAdd(y Float32x8, z Float32x8, u Mask32x8) Float32x8 - -// FusedMultiplySubAdd performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements. -// -// Asm: VFMSUBADD213PS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedFusedMultiplySubAdd(y Float32x16, z Float32x16, u Mask32x16) Float32x16 - -// FusedMultiplySubAdd performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements. -// -// Asm: VFMSUBADD213PD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedFusedMultiplySubAdd(y Float64x2, z Float64x2, u Mask64x2) Float64x2 - -// FusedMultiplySubAdd performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements. -// -// Asm: VFMSUBADD213PD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedFusedMultiplySubAdd(y Float64x4, z Float64x4, u Mask64x4) Float64x4 - -// FusedMultiplySubAdd performs `(v1 * v2) + v3` for odd-indexed elements, and `(v1 * v2) - v3` for even-indexed elements. -// -// Asm: VFMSUBADD213PD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedFusedMultiplySubAdd(y Float64x8, z Float64x8, u Mask64x8) Float64x8 - -/* MaskedGaloisFieldAffineTransform */ - -// GaloisFieldAffineTransform computes an affine transformation in GF(2^8): -// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; -// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y -// corresponding to a group of 8 elements in x. -// -// Asm: VGF2P8AFFINEQB, CPU Feature: AVX512EVEX -func (x Uint8x16) MaskedGaloisFieldAffineTransform(y Uint64x2, b uint8, m Mask8x16) Uint8x16 - -// GaloisFieldAffineTransform computes an affine transformation in GF(2^8): -// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; -// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y -// corresponding to a group of 8 elements in x. -// -// Asm: VGF2P8AFFINEQB, CPU Feature: AVX512EVEX -func (x Uint8x32) MaskedGaloisFieldAffineTransform(y Uint64x4, b uint8, m Mask8x32) Uint8x32 - -// GaloisFieldAffineTransform computes an affine transformation in GF(2^8): -// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; -// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y -// corresponding to a group of 8 elements in x. -// -// Asm: VGF2P8AFFINEQB, CPU Feature: AVX512EVEX -func (x Uint8x64) MaskedGaloisFieldAffineTransform(y Uint64x8, b uint8, m Mask8x64) Uint8x64 - -/* MaskedGaloisFieldAffineTransformInversed */ - -// GaloisFieldAffineTransform computes an affine transformation in GF(2^8), -// with x inversed with reduction polynomial x^8 + x^4 + x^3 + x + 1: -// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; -// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y -// corresponding to a group of 8 elements in x. -// -// Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512EVEX -func (x Uint8x16) MaskedGaloisFieldAffineTransformInversed(y Uint64x2, b uint8, m Mask8x16) Uint8x16 - -// GaloisFieldAffineTransform computes an affine transformation in GF(2^8), -// with x inversed with reduction polynomial x^8 + x^4 + x^3 + x + 1: -// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; -// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y -// corresponding to a group of 8 elements in x. -// -// Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512EVEX -func (x Uint8x32) MaskedGaloisFieldAffineTransformInversed(y Uint64x4, b uint8, m Mask8x32) Uint8x32 - -// GaloisFieldAffineTransform computes an affine transformation in GF(2^8), -// with x inversed with reduction polynomial x^8 + x^4 + x^3 + x + 1: -// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; -// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y -// corresponding to a group of 8 elements in x. -// -// Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512EVEX -func (x Uint8x64) MaskedGaloisFieldAffineTransformInversed(y Uint64x8, b uint8, m Mask8x64) Uint8x64 - -/* MaskedGaloisFieldMul */ - -// GaloisFieldMul computes element-wise GF(2^8) multiplication with -// reduction polynomial x^8 + x^4 + x^3 + x + 1. -// -// Asm: VGF2P8MULB, CPU Feature: AVX512EVEX -func (x Uint8x16) MaskedGaloisFieldMul(y Uint8x16, z Mask8x16) Uint8x16 - -// GaloisFieldMul computes element-wise GF(2^8) multiplication with -// reduction polynomial x^8 + x^4 + x^3 + x + 1. -// -// Asm: VGF2P8MULB, CPU Feature: AVX512EVEX -func (x Uint8x32) MaskedGaloisFieldMul(y Uint8x32, z Mask8x32) Uint8x32 - -// GaloisFieldMul computes element-wise GF(2^8) multiplication with -// reduction polynomial x^8 + x^4 + x^3 + x + 1. -// -// Asm: VGF2P8MULB, CPU Feature: AVX512EVEX -func (x Uint8x64) MaskedGaloisFieldMul(y Uint8x64, z Mask8x64) Uint8x64 - -/* MaskedGreater */ - -// Greater compares for greater than. -// -// Asm: VCMPPS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedGreater(y Float32x4, z Mask32x4) Mask32x4 - -// Greater compares for greater than. -// -// Asm: VCMPPS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedGreater(y Float32x8, z Mask32x8) Mask32x8 - -// Greater compares for greater than. -// -// Asm: VCMPPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedGreater(y Float32x16, z Mask32x16) Mask32x16 - -// Greater compares for greater than. -// -// Asm: VCMPPD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedGreater(y Float64x2, z Mask64x2) Mask64x2 - -// Greater compares for greater than. -// -// Asm: VCMPPD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedGreater(y Float64x4, z Mask64x4) Mask64x4 - -// Greater compares for greater than. -// -// Asm: VCMPPD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedGreater(y Float64x8, z Mask64x8) Mask64x8 - -// Greater compares for greater than. -// -// Asm: VPCMPB, CPU Feature: AVX512EVEX -func (x Int8x16) MaskedGreater(y Int8x16, z Mask8x16) Mask8x16 - -// Greater compares for greater than. -// -// Asm: VPCMPB, CPU Feature: AVX512EVEX -func (x Int8x32) MaskedGreater(y Int8x32, z Mask8x32) Mask8x32 - -// Greater compares for greater than. -// -// Asm: VPCMPB, CPU Feature: AVX512EVEX -func (x Int8x64) MaskedGreater(y Int8x64, z Mask8x64) Mask8x64 - -// Greater compares for greater than. -// -// Asm: VPCMPW, CPU Feature: AVX512EVEX -func (x Int16x8) MaskedGreater(y Int16x8, z Mask16x8) Mask16x8 - -// Greater compares for greater than. -// -// Asm: VPCMPW, CPU Feature: AVX512EVEX -func (x Int16x16) MaskedGreater(y Int16x16, z Mask16x16) Mask16x16 - -// Greater compares for greater than. -// -// Asm: VPCMPW, CPU Feature: AVX512EVEX -func (x Int16x32) MaskedGreater(y Int16x32, z Mask16x32) Mask16x32 - -// Greater compares for greater than. -// -// Asm: VPCMPD, CPU Feature: AVX512EVEX -func (x Int32x4) MaskedGreater(y Int32x4, z Mask32x4) Mask32x4 - -// Greater compares for greater than. -// -// Asm: VPCMPD, CPU Feature: AVX512EVEX -func (x Int32x8) MaskedGreater(y Int32x8, z Mask32x8) Mask32x8 - -// Greater compares for greater than. -// -// Asm: VPCMPD, CPU Feature: AVX512EVEX -func (x Int32x16) MaskedGreater(y Int32x16, z Mask32x16) Mask32x16 - -// Greater compares for greater than. -// -// Asm: VPCMPQ, CPU Feature: AVX512EVEX -func (x Int64x2) MaskedGreater(y Int64x2, z Mask64x2) Mask64x2 - -// Greater compares for greater than. -// -// Asm: VPCMPQ, CPU Feature: AVX512EVEX -func (x Int64x4) MaskedGreater(y Int64x4, z Mask64x4) Mask64x4 - -// Greater compares for greater than. -// -// Asm: VPCMPQ, CPU Feature: AVX512EVEX -func (x Int64x8) MaskedGreater(y Int64x8, z Mask64x8) Mask64x8 - -// Greater compares for greater than. -// -// Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x16) MaskedGreater(y Uint8x16, z Mask8x16) Mask8x16 - -// Greater compares for greater than. -// -// Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x32) MaskedGreater(y Uint8x32, z Mask8x32) Mask8x32 - -// Greater compares for greater than. -// -// Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x64) MaskedGreater(y Uint8x64, z Mask8x64) Mask8x64 - -// Greater compares for greater than. -// -// Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x8) MaskedGreater(y Uint16x8, z Mask16x8) Mask16x8 - -// Greater compares for greater than. -// -// Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x16) MaskedGreater(y Uint16x16, z Mask16x16) Mask16x16 - -// Greater compares for greater than. -// -// Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x32) MaskedGreater(y Uint16x32, z Mask16x32) Mask16x32 - -// Greater compares for greater than. -// -// Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x4) MaskedGreater(y Uint32x4, z Mask32x4) Mask32x4 - -// Greater compares for greater than. -// -// Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x8) MaskedGreater(y Uint32x8, z Mask32x8) Mask32x8 - -// Greater compares for greater than. -// -// Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x16) MaskedGreater(y Uint32x16, z Mask32x16) Mask32x16 - -// Greater compares for greater than. -// -// Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x2) MaskedGreater(y Uint64x2, z Mask64x2) Mask64x2 - -// Greater compares for greater than. -// -// Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x4) MaskedGreater(y Uint64x4, z Mask64x4) Mask64x4 - -// Greater compares for greater than. -// -// Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x8) MaskedGreater(y Uint64x8, z Mask64x8) Mask64x8 - -/* MaskedGreaterEqual */ - -// GreaterEqual compares for greater than or equal. -// -// Asm: VCMPPS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedGreaterEqual(y Float32x4, z Mask32x4) Mask32x4 - -// GreaterEqual compares for greater than or equal. -// -// Asm: VCMPPS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedGreaterEqual(y Float32x8, z Mask32x8) Mask32x8 - -// GreaterEqual compares for greater than or equal. -// -// Asm: VCMPPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedGreaterEqual(y Float32x16, z Mask32x16) Mask32x16 - -// GreaterEqual compares for greater than or equal. -// -// Asm: VCMPPD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedGreaterEqual(y Float64x2, z Mask64x2) Mask64x2 - -// GreaterEqual compares for greater than or equal. -// -// Asm: VCMPPD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedGreaterEqual(y Float64x4, z Mask64x4) Mask64x4 - -// GreaterEqual compares for greater than or equal. -// -// Asm: VCMPPD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedGreaterEqual(y Float64x8, z Mask64x8) Mask64x8 - -// GreaterEqual compares for greater than or equal. -// -// Asm: VPCMPB, CPU Feature: AVX512EVEX -func (x Int8x16) MaskedGreaterEqual(y Int8x16, z Mask8x16) Mask8x16 - -// GreaterEqual compares for greater than or equal. -// -// Asm: VPCMPB, CPU Feature: AVX512EVEX -func (x Int8x32) MaskedGreaterEqual(y Int8x32, z Mask8x32) Mask8x32 - -// GreaterEqual compares for greater than or equal. -// -// Asm: VPCMPB, CPU Feature: AVX512EVEX -func (x Int8x64) MaskedGreaterEqual(y Int8x64, z Mask8x64) Mask8x64 - -// GreaterEqual compares for greater than or equal. -// -// Asm: VPCMPW, CPU Feature: AVX512EVEX -func (x Int16x8) MaskedGreaterEqual(y Int16x8, z Mask16x8) Mask16x8 - -// GreaterEqual compares for greater than or equal. -// -// Asm: VPCMPW, CPU Feature: AVX512EVEX -func (x Int16x16) MaskedGreaterEqual(y Int16x16, z Mask16x16) Mask16x16 - -// GreaterEqual compares for greater than or equal. -// -// Asm: VPCMPW, CPU Feature: AVX512EVEX -func (x Int16x32) MaskedGreaterEqual(y Int16x32, z Mask16x32) Mask16x32 - -// GreaterEqual compares for greater than or equal. -// -// Asm: VPCMPD, CPU Feature: AVX512EVEX -func (x Int32x4) MaskedGreaterEqual(y Int32x4, z Mask32x4) Mask32x4 - -// GreaterEqual compares for greater than or equal. -// -// Asm: VPCMPD, CPU Feature: AVX512EVEX -func (x Int32x8) MaskedGreaterEqual(y Int32x8, z Mask32x8) Mask32x8 - -// GreaterEqual compares for greater than or equal. -// -// Asm: VPCMPD, CPU Feature: AVX512EVEX -func (x Int32x16) MaskedGreaterEqual(y Int32x16, z Mask32x16) Mask32x16 - -// GreaterEqual compares for greater than or equal. -// -// Asm: VPCMPQ, CPU Feature: AVX512EVEX -func (x Int64x2) MaskedGreaterEqual(y Int64x2, z Mask64x2) Mask64x2 - -// GreaterEqual compares for greater than or equal. -// -// Asm: VPCMPQ, CPU Feature: AVX512EVEX -func (x Int64x4) MaskedGreaterEqual(y Int64x4, z Mask64x4) Mask64x4 - -// GreaterEqual compares for greater than or equal. -// -// Asm: VPCMPQ, CPU Feature: AVX512EVEX -func (x Int64x8) MaskedGreaterEqual(y Int64x8, z Mask64x8) Mask64x8 - -// GreaterEqual compares for greater than or equal. -// -// Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x16) MaskedGreaterEqual(y Uint8x16, z Mask8x16) Mask8x16 - -// GreaterEqual compares for greater than or equal. -// -// Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x32) MaskedGreaterEqual(y Uint8x32, z Mask8x32) Mask8x32 - -// GreaterEqual compares for greater than or equal. -// -// Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x64) MaskedGreaterEqual(y Uint8x64, z Mask8x64) Mask8x64 - -// GreaterEqual compares for greater than or equal. -// -// Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x8) MaskedGreaterEqual(y Uint16x8, z Mask16x8) Mask16x8 - -// GreaterEqual compares for greater than or equal. -// -// Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x16) MaskedGreaterEqual(y Uint16x16, z Mask16x16) Mask16x16 - -// GreaterEqual compares for greater than or equal. -// -// Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x32) MaskedGreaterEqual(y Uint16x32, z Mask16x32) Mask16x32 - -// GreaterEqual compares for greater than or equal. -// -// Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x4) MaskedGreaterEqual(y Uint32x4, z Mask32x4) Mask32x4 - -// GreaterEqual compares for greater than or equal. -// -// Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x8) MaskedGreaterEqual(y Uint32x8, z Mask32x8) Mask32x8 - -// GreaterEqual compares for greater than or equal. -// -// Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x16) MaskedGreaterEqual(y Uint32x16, z Mask32x16) Mask32x16 - -// GreaterEqual compares for greater than or equal. -// -// Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x2) MaskedGreaterEqual(y Uint64x2, z Mask64x2) Mask64x2 - -// GreaterEqual compares for greater than or equal. -// -// Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x4) MaskedGreaterEqual(y Uint64x4, z Mask64x4) Mask64x4 - -// GreaterEqual compares for greater than or equal. -// -// Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x8) MaskedGreaterEqual(y Uint64x8, z Mask64x8) Mask64x8 - -/* MaskedIsNan */ - -// IsNan checks if elements are NaN. Use as x.IsNan(x). -// -// Asm: VCMPPS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedIsNan(y Float32x4, z Mask32x4) Mask32x4 - -// IsNan checks if elements are NaN. Use as x.IsNan(x). -// -// Asm: VCMPPS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedIsNan(y Float32x8, z Mask32x8) Mask32x8 - -// IsNan checks if elements are NaN. Use as x.IsNan(x). -// -// Asm: VCMPPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedIsNan(y Float32x16, z Mask32x16) Mask32x16 - -// IsNan checks if elements are NaN. Use as x.IsNan(x). -// -// Asm: VCMPPD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedIsNan(y Float64x2, z Mask64x2) Mask64x2 - -// IsNan checks if elements are NaN. Use as x.IsNan(x). -// -// Asm: VCMPPD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedIsNan(y Float64x4, z Mask64x4) Mask64x4 - -// IsNan checks if elements are NaN. Use as x.IsNan(x). -// -// Asm: VCMPPD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedIsNan(y Float64x8, z Mask64x8) Mask64x8 - -/* MaskedLess */ - -// Less compares for less than. -// -// Asm: VCMPPS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedLess(y Float32x4, z Mask32x4) Mask32x4 - -// Less compares for less than. -// -// Asm: VCMPPS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedLess(y Float32x8, z Mask32x8) Mask32x8 - -// Less compares for less than. -// -// Asm: VCMPPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedLess(y Float32x16, z Mask32x16) Mask32x16 - -// Less compares for less than. -// -// Asm: VCMPPD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedLess(y Float64x2, z Mask64x2) Mask64x2 - -// Less compares for less than. -// -// Asm: VCMPPD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedLess(y Float64x4, z Mask64x4) Mask64x4 - -// Less compares for less than. -// -// Asm: VCMPPD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedLess(y Float64x8, z Mask64x8) Mask64x8 - -// Less compares for less than. -// -// Asm: VPCMPB, CPU Feature: AVX512EVEX -func (x Int8x16) MaskedLess(y Int8x16, z Mask8x16) Mask8x16 - -// Less compares for less than. -// -// Asm: VPCMPB, CPU Feature: AVX512EVEX -func (x Int8x32) MaskedLess(y Int8x32, z Mask8x32) Mask8x32 - -// Less compares for less than. -// -// Asm: VPCMPB, CPU Feature: AVX512EVEX -func (x Int8x64) MaskedLess(y Int8x64, z Mask8x64) Mask8x64 - -// Less compares for less than. -// -// Asm: VPCMPW, CPU Feature: AVX512EVEX -func (x Int16x8) MaskedLess(y Int16x8, z Mask16x8) Mask16x8 - -// Less compares for less than. -// -// Asm: VPCMPW, CPU Feature: AVX512EVEX -func (x Int16x16) MaskedLess(y Int16x16, z Mask16x16) Mask16x16 - -// Less compares for less than. -// -// Asm: VPCMPW, CPU Feature: AVX512EVEX -func (x Int16x32) MaskedLess(y Int16x32, z Mask16x32) Mask16x32 - -// Less compares for less than. -// -// Asm: VPCMPD, CPU Feature: AVX512EVEX -func (x Int32x4) MaskedLess(y Int32x4, z Mask32x4) Mask32x4 - -// Less compares for less than. -// -// Asm: VPCMPD, CPU Feature: AVX512EVEX -func (x Int32x8) MaskedLess(y Int32x8, z Mask32x8) Mask32x8 - -// Less compares for less than. -// -// Asm: VPCMPD, CPU Feature: AVX512EVEX -func (x Int32x16) MaskedLess(y Int32x16, z Mask32x16) Mask32x16 - -// Less compares for less than. -// -// Asm: VPCMPQ, CPU Feature: AVX512EVEX -func (x Int64x2) MaskedLess(y Int64x2, z Mask64x2) Mask64x2 - -// Less compares for less than. -// -// Asm: VPCMPQ, CPU Feature: AVX512EVEX -func (x Int64x4) MaskedLess(y Int64x4, z Mask64x4) Mask64x4 - -// Less compares for less than. -// -// Asm: VPCMPQ, CPU Feature: AVX512EVEX -func (x Int64x8) MaskedLess(y Int64x8, z Mask64x8) Mask64x8 - -// Less compares for less than. -// -// Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x16) MaskedLess(y Uint8x16, z Mask8x16) Mask8x16 - -// Less compares for less than. -// -// Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x32) MaskedLess(y Uint8x32, z Mask8x32) Mask8x32 - -// Less compares for less than. -// -// Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x64) MaskedLess(y Uint8x64, z Mask8x64) Mask8x64 - -// Less compares for less than. -// -// Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x8) MaskedLess(y Uint16x8, z Mask16x8) Mask16x8 - -// Less compares for less than. -// -// Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x16) MaskedLess(y Uint16x16, z Mask16x16) Mask16x16 - -// Less compares for less than. -// -// Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x32) MaskedLess(y Uint16x32, z Mask16x32) Mask16x32 - -// Less compares for less than. -// -// Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x4) MaskedLess(y Uint32x4, z Mask32x4) Mask32x4 - -// Less compares for less than. -// -// Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x8) MaskedLess(y Uint32x8, z Mask32x8) Mask32x8 - -// Less compares for less than. -// -// Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x16) MaskedLess(y Uint32x16, z Mask32x16) Mask32x16 - -// Less compares for less than. -// -// Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x2) MaskedLess(y Uint64x2, z Mask64x2) Mask64x2 - -// Less compares for less than. -// -// Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x4) MaskedLess(y Uint64x4, z Mask64x4) Mask64x4 - -// Less compares for less than. -// -// Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x8) MaskedLess(y Uint64x8, z Mask64x8) Mask64x8 - -/* MaskedLessEqual */ - -// LessEqual compares for less than or equal. -// -// Asm: VCMPPS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedLessEqual(y Float32x4, z Mask32x4) Mask32x4 - -// LessEqual compares for less than or equal. -// -// Asm: VCMPPS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedLessEqual(y Float32x8, z Mask32x8) Mask32x8 - -// LessEqual compares for less than or equal. -// -// Asm: VCMPPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedLessEqual(y Float32x16, z Mask32x16) Mask32x16 - -// LessEqual compares for less than or equal. -// -// Asm: VCMPPD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedLessEqual(y Float64x2, z Mask64x2) Mask64x2 - -// LessEqual compares for less than or equal. -// -// Asm: VCMPPD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedLessEqual(y Float64x4, z Mask64x4) Mask64x4 - -// LessEqual compares for less than or equal. -// -// Asm: VCMPPD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedLessEqual(y Float64x8, z Mask64x8) Mask64x8 - -// LessEqual compares for less than or equal. -// -// Asm: VPCMPB, CPU Feature: AVX512EVEX -func (x Int8x16) MaskedLessEqual(y Int8x16, z Mask8x16) Mask8x16 - -// LessEqual compares for less than or equal. -// -// Asm: VPCMPB, CPU Feature: AVX512EVEX -func (x Int8x32) MaskedLessEqual(y Int8x32, z Mask8x32) Mask8x32 - -// LessEqual compares for less than or equal. -// -// Asm: VPCMPB, CPU Feature: AVX512EVEX -func (x Int8x64) MaskedLessEqual(y Int8x64, z Mask8x64) Mask8x64 - -// LessEqual compares for less than or equal. -// -// Asm: VPCMPW, CPU Feature: AVX512EVEX -func (x Int16x8) MaskedLessEqual(y Int16x8, z Mask16x8) Mask16x8 - -// LessEqual compares for less than or equal. -// -// Asm: VPCMPW, CPU Feature: AVX512EVEX -func (x Int16x16) MaskedLessEqual(y Int16x16, z Mask16x16) Mask16x16 - -// LessEqual compares for less than or equal. -// -// Asm: VPCMPW, CPU Feature: AVX512EVEX -func (x Int16x32) MaskedLessEqual(y Int16x32, z Mask16x32) Mask16x32 - -// LessEqual compares for less than or equal. -// -// Asm: VPCMPD, CPU Feature: AVX512EVEX -func (x Int32x4) MaskedLessEqual(y Int32x4, z Mask32x4) Mask32x4 - -// LessEqual compares for less than or equal. -// -// Asm: VPCMPD, CPU Feature: AVX512EVEX -func (x Int32x8) MaskedLessEqual(y Int32x8, z Mask32x8) Mask32x8 - -// LessEqual compares for less than or equal. -// -// Asm: VPCMPD, CPU Feature: AVX512EVEX -func (x Int32x16) MaskedLessEqual(y Int32x16, z Mask32x16) Mask32x16 - -// LessEqual compares for less than or equal. -// -// Asm: VPCMPQ, CPU Feature: AVX512EVEX -func (x Int64x2) MaskedLessEqual(y Int64x2, z Mask64x2) Mask64x2 - -// LessEqual compares for less than or equal. -// -// Asm: VPCMPQ, CPU Feature: AVX512EVEX -func (x Int64x4) MaskedLessEqual(y Int64x4, z Mask64x4) Mask64x4 - -// LessEqual compares for less than or equal. -// -// Asm: VPCMPQ, CPU Feature: AVX512EVEX -func (x Int64x8) MaskedLessEqual(y Int64x8, z Mask64x8) Mask64x8 - -// LessEqual compares for less than or equal. -// -// Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x16) MaskedLessEqual(y Uint8x16, z Mask8x16) Mask8x16 - -// LessEqual compares for less than or equal. -// -// Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x32) MaskedLessEqual(y Uint8x32, z Mask8x32) Mask8x32 - -// LessEqual compares for less than or equal. -// -// Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x64) MaskedLessEqual(y Uint8x64, z Mask8x64) Mask8x64 - -// LessEqual compares for less than or equal. -// -// Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x8) MaskedLessEqual(y Uint16x8, z Mask16x8) Mask16x8 - -// LessEqual compares for less than or equal. -// -// Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x16) MaskedLessEqual(y Uint16x16, z Mask16x16) Mask16x16 - -// LessEqual compares for less than or equal. -// -// Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x32) MaskedLessEqual(y Uint16x32, z Mask16x32) Mask16x32 - -// LessEqual compares for less than or equal. -// -// Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x4) MaskedLessEqual(y Uint32x4, z Mask32x4) Mask32x4 - -// LessEqual compares for less than or equal. -// -// Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x8) MaskedLessEqual(y Uint32x8, z Mask32x8) Mask32x8 - -// LessEqual compares for less than or equal. -// -// Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x16) MaskedLessEqual(y Uint32x16, z Mask32x16) Mask32x16 - -// LessEqual compares for less than or equal. -// -// Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x2) MaskedLessEqual(y Uint64x2, z Mask64x2) Mask64x2 - -// LessEqual compares for less than or equal. -// -// Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x4) MaskedLessEqual(y Uint64x4, z Mask64x4) Mask64x4 - -// LessEqual compares for less than or equal. -// -// Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x8) MaskedLessEqual(y Uint64x8, z Mask64x8) Mask64x8 - -/* MaskedMax */ - -// Max computes the maximum of corresponding elements. -// -// Asm: VMAXPS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedMax(y Float32x4, z Mask32x4) Float32x4 - -// Max computes the maximum of corresponding elements. -// -// Asm: VMAXPS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedMax(y Float32x8, z Mask32x8) Float32x8 - -// Max computes the maximum of corresponding elements. -// -// Asm: VMAXPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedMax(y Float32x16, z Mask32x16) Float32x16 - -// Max computes the maximum of corresponding elements. -// -// Asm: VMAXPD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedMax(y Float64x2, z Mask64x2) Float64x2 - -// Max computes the maximum of corresponding elements. -// -// Asm: VMAXPD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedMax(y Float64x4, z Mask64x4) Float64x4 - -// Max computes the maximum of corresponding elements. -// -// Asm: VMAXPD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedMax(y Float64x8, z Mask64x8) Float64x8 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXSB, CPU Feature: AVX512EVEX -func (x Int8x16) MaskedMax(y Int8x16, z Mask8x16) Int8x16 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXSB, CPU Feature: AVX512EVEX -func (x Int8x32) MaskedMax(y Int8x32, z Mask8x32) Int8x32 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXSB, CPU Feature: AVX512EVEX -func (x Int8x64) MaskedMax(y Int8x64, z Mask8x64) Int8x64 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXSW, CPU Feature: AVX512EVEX -func (x Int16x8) MaskedMax(y Int16x8, z Mask16x8) Int16x8 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXSW, CPU Feature: AVX512EVEX -func (x Int16x16) MaskedMax(y Int16x16, z Mask16x16) Int16x16 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXSW, CPU Feature: AVX512EVEX -func (x Int16x32) MaskedMax(y Int16x32, z Mask16x32) Int16x32 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXSD, CPU Feature: AVX512EVEX -func (x Int32x4) MaskedMax(y Int32x4, z Mask32x4) Int32x4 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXSD, CPU Feature: AVX512EVEX -func (x Int32x8) MaskedMax(y Int32x8, z Mask32x8) Int32x8 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXSD, CPU Feature: AVX512EVEX -func (x Int32x16) MaskedMax(y Int32x16, z Mask32x16) Int32x16 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXSQ, CPU Feature: AVX512EVEX -func (x Int64x2) MaskedMax(y Int64x2, z Mask64x2) Int64x2 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXSQ, CPU Feature: AVX512EVEX -func (x Int64x4) MaskedMax(y Int64x4, z Mask64x4) Int64x4 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXSQ, CPU Feature: AVX512EVEX -func (x Int64x8) MaskedMax(y Int64x8, z Mask64x8) Int64x8 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXUB, CPU Feature: AVX512EVEX -func (x Uint8x16) MaskedMax(y Uint8x16, z Mask8x16) Uint8x16 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXUB, CPU Feature: AVX512EVEX -func (x Uint8x32) MaskedMax(y Uint8x32, z Mask8x32) Uint8x32 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXUB, CPU Feature: AVX512EVEX -func (x Uint8x64) MaskedMax(y Uint8x64, z Mask8x64) Uint8x64 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXUW, CPU Feature: AVX512EVEX -func (x Uint16x8) MaskedMax(y Uint16x8, z Mask16x8) Uint16x8 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXUW, CPU Feature: AVX512EVEX -func (x Uint16x16) MaskedMax(y Uint16x16, z Mask16x16) Uint16x16 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXUW, CPU Feature: AVX512EVEX -func (x Uint16x32) MaskedMax(y Uint16x32, z Mask16x32) Uint16x32 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXUD, CPU Feature: AVX512EVEX -func (x Uint32x4) MaskedMax(y Uint32x4, z Mask32x4) Uint32x4 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXUD, CPU Feature: AVX512EVEX -func (x Uint32x8) MaskedMax(y Uint32x8, z Mask32x8) Uint32x8 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXUD, CPU Feature: AVX512EVEX -func (x Uint32x16) MaskedMax(y Uint32x16, z Mask32x16) Uint32x16 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXUQ, CPU Feature: AVX512EVEX -func (x Uint64x2) MaskedMax(y Uint64x2, z Mask64x2) Uint64x2 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXUQ, CPU Feature: AVX512EVEX -func (x Uint64x4) MaskedMax(y Uint64x4, z Mask64x4) Uint64x4 - -// Max computes the maximum of corresponding elements. -// -// Asm: VPMAXUQ, CPU Feature: AVX512EVEX -func (x Uint64x8) MaskedMax(y Uint64x8, z Mask64x8) Uint64x8 - -/* MaskedMin */ - -// Min computes the minimum of corresponding elements. -// -// Asm: VMINPS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedMin(y Float32x4, z Mask32x4) Float32x4 - -// Min computes the minimum of corresponding elements. -// -// Asm: VMINPS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedMin(y Float32x8, z Mask32x8) Float32x8 - -// Min computes the minimum of corresponding elements. -// -// Asm: VMINPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedMin(y Float32x16, z Mask32x16) Float32x16 - -// Min computes the minimum of corresponding elements. -// -// Asm: VMINPD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedMin(y Float64x2, z Mask64x2) Float64x2 - -// Min computes the minimum of corresponding elements. -// -// Asm: VMINPD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedMin(y Float64x4, z Mask64x4) Float64x4 - -// Min computes the minimum of corresponding elements. -// -// Asm: VMINPD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedMin(y Float64x8, z Mask64x8) Float64x8 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINSB, CPU Feature: AVX512EVEX -func (x Int8x16) MaskedMin(y Int8x16, z Mask8x16) Int8x16 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINSB, CPU Feature: AVX512EVEX -func (x Int8x32) MaskedMin(y Int8x32, z Mask8x32) Int8x32 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINSB, CPU Feature: AVX512EVEX -func (x Int8x64) MaskedMin(y Int8x64, z Mask8x64) Int8x64 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINSW, CPU Feature: AVX512EVEX -func (x Int16x8) MaskedMin(y Int16x8, z Mask16x8) Int16x8 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINSW, CPU Feature: AVX512EVEX -func (x Int16x16) MaskedMin(y Int16x16, z Mask16x16) Int16x16 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINSW, CPU Feature: AVX512EVEX -func (x Int16x32) MaskedMin(y Int16x32, z Mask16x32) Int16x32 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINSD, CPU Feature: AVX512EVEX -func (x Int32x4) MaskedMin(y Int32x4, z Mask32x4) Int32x4 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINSD, CPU Feature: AVX512EVEX -func (x Int32x8) MaskedMin(y Int32x8, z Mask32x8) Int32x8 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINSD, CPU Feature: AVX512EVEX -func (x Int32x16) MaskedMin(y Int32x16, z Mask32x16) Int32x16 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINSQ, CPU Feature: AVX512EVEX -func (x Int64x2) MaskedMin(y Int64x2, z Mask64x2) Int64x2 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINSQ, CPU Feature: AVX512EVEX -func (x Int64x4) MaskedMin(y Int64x4, z Mask64x4) Int64x4 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINSQ, CPU Feature: AVX512EVEX -func (x Int64x8) MaskedMin(y Int64x8, z Mask64x8) Int64x8 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINUB, CPU Feature: AVX512EVEX -func (x Uint8x16) MaskedMin(y Uint8x16, z Mask8x16) Uint8x16 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINUB, CPU Feature: AVX512EVEX -func (x Uint8x32) MaskedMin(y Uint8x32, z Mask8x32) Uint8x32 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINUB, CPU Feature: AVX512EVEX -func (x Uint8x64) MaskedMin(y Uint8x64, z Mask8x64) Uint8x64 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINUW, CPU Feature: AVX512EVEX -func (x Uint16x8) MaskedMin(y Uint16x8, z Mask16x8) Uint16x8 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINUW, CPU Feature: AVX512EVEX -func (x Uint16x16) MaskedMin(y Uint16x16, z Mask16x16) Uint16x16 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINUW, CPU Feature: AVX512EVEX -func (x Uint16x32) MaskedMin(y Uint16x32, z Mask16x32) Uint16x32 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINUD, CPU Feature: AVX512EVEX -func (x Uint32x4) MaskedMin(y Uint32x4, z Mask32x4) Uint32x4 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINUD, CPU Feature: AVX512EVEX -func (x Uint32x8) MaskedMin(y Uint32x8, z Mask32x8) Uint32x8 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINUD, CPU Feature: AVX512EVEX -func (x Uint32x16) MaskedMin(y Uint32x16, z Mask32x16) Uint32x16 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINUQ, CPU Feature: AVX512EVEX -func (x Uint64x2) MaskedMin(y Uint64x2, z Mask64x2) Uint64x2 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINUQ, CPU Feature: AVX512EVEX -func (x Uint64x4) MaskedMin(y Uint64x4, z Mask64x4) Uint64x4 - -// Min computes the minimum of corresponding elements. -// -// Asm: VPMINUQ, CPU Feature: AVX512EVEX -func (x Uint64x8) MaskedMin(y Uint64x8, z Mask64x8) Uint64x8 - -/* MaskedMul */ - -// Mul multiplies corresponding elements of two vectors, masked. -// -// Asm: VMULPS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedMul(y Float32x4, z Mask32x4) Float32x4 - -// Mul multiplies corresponding elements of two vectors, masked. -// -// Asm: VMULPS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedMul(y Float32x8, z Mask32x8) Float32x8 - -// Mul multiplies corresponding elements of two vectors, masked. -// -// Asm: VMULPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedMul(y Float32x16, z Mask32x16) Float32x16 - -// Mul multiplies corresponding elements of two vectors, masked. -// -// Asm: VMULPD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedMul(y Float64x2, z Mask64x2) Float64x2 - -// Mul multiplies corresponding elements of two vectors, masked. -// -// Asm: VMULPD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedMul(y Float64x4, z Mask64x4) Float64x4 - -// Mul multiplies corresponding elements of two vectors, masked. -// -// Asm: VMULPD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedMul(y Float64x8, z Mask64x8) Float64x8 - -/* MaskedMulByPowOf2 */ - -// MulByPowOf2 multiplies elements by a power of 2. -// -// Asm: VSCALEFPS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedMulByPowOf2(y Float32x4, z Mask32x4) Float32x4 - -// MulByPowOf2 multiplies elements by a power of 2. -// -// Asm: VSCALEFPS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedMulByPowOf2(y Float32x8, z Mask32x8) Float32x8 - -// MulByPowOf2 multiplies elements by a power of 2. -// -// Asm: VSCALEFPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedMulByPowOf2(y Float32x16, z Mask32x16) Float32x16 - -// MulByPowOf2 multiplies elements by a power of 2. -// -// Asm: VSCALEFPD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedMulByPowOf2(y Float64x2, z Mask64x2) Float64x2 - -// MulByPowOf2 multiplies elements by a power of 2. -// -// Asm: VSCALEFPD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedMulByPowOf2(y Float64x4, z Mask64x4) Float64x4 - -// MulByPowOf2 multiplies elements by a power of 2. -// -// Asm: VSCALEFPD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedMulByPowOf2(y Float64x8, z Mask64x8) Float64x8 - -/* MaskedMulEvenWiden */ - -// MulEvenWiden multiplies even-indexed elements, widening the result, masked. -// Result[i] = v1.Even[i] * v2.Even[i]. -// -// Asm: VPMULDQ, CPU Feature: AVX512EVEX -func (x Int64x2) MaskedMulEvenWiden(y Int64x2, z Mask64x2) Int64x2 - -// MulEvenWiden multiplies even-indexed elements, widening the result, masked. -// Result[i] = v1.Even[i] * v2.Even[i]. -// -// Asm: VPMULDQ, CPU Feature: AVX512EVEX -func (x Int64x4) MaskedMulEvenWiden(y Int64x4, z Mask64x4) Int64x4 - -// MulEvenWiden multiplies even-indexed elements, widening the result, masked. -// Result[i] = v1.Even[i] * v2.Even[i]. -// -// Asm: VPMULDQ, CPU Feature: AVX512EVEX -func (x Int64x8) MaskedMulEvenWiden(y Int64x8, z Mask64x8) Int64x8 - -// MulEvenWiden multiplies even-indexed elements, widening the result, masked. -// Result[i] = v1.Even[i] * v2.Even[i]. -// -// Asm: VPMULUDQ, CPU Feature: AVX512EVEX -func (x Uint64x2) MaskedMulEvenWiden(y Uint64x2, z Mask64x2) Uint64x2 - -// MulEvenWiden multiplies even-indexed elements, widening the result, masked. -// Result[i] = v1.Even[i] * v2.Even[i]. -// -// Asm: VPMULUDQ, CPU Feature: AVX512EVEX -func (x Uint64x4) MaskedMulEvenWiden(y Uint64x4, z Mask64x4) Uint64x4 - -// MulEvenWiden multiplies even-indexed elements, widening the result, masked. -// Result[i] = v1.Even[i] * v2.Even[i]. -// -// Asm: VPMULUDQ, CPU Feature: AVX512EVEX -func (x Uint64x8) MaskedMulEvenWiden(y Uint64x8, z Mask64x8) Uint64x8 - -/* MaskedMulHigh */ - -// MulHigh multiplies elements and stores the high part of the result, masked. -// -// Asm: VPMULHW, CPU Feature: AVX512EVEX -func (x Int16x8) MaskedMulHigh(y Int16x8, z Mask16x8) Int16x8 - -// MulHigh multiplies elements and stores the high part of the result, masked. -// -// Asm: VPMULHW, CPU Feature: AVX512EVEX -func (x Int16x16) MaskedMulHigh(y Int16x16, z Mask16x16) Int16x16 - -// MulHigh multiplies elements and stores the high part of the result, masked. -// -// Asm: VPMULHW, CPU Feature: AVX512EVEX -func (x Int16x32) MaskedMulHigh(y Int16x32, z Mask16x32) Int16x32 - -// MulHigh multiplies elements and stores the high part of the result, masked. -// -// Asm: VPMULHUW, CPU Feature: AVX512EVEX -func (x Uint16x8) MaskedMulHigh(y Uint16x8, z Mask16x8) Uint16x8 - -// MulHigh multiplies elements and stores the high part of the result, masked. -// -// Asm: VPMULHUW, CPU Feature: AVX512EVEX -func (x Uint16x16) MaskedMulHigh(y Uint16x16, z Mask16x16) Uint16x16 - -// MulHigh multiplies elements and stores the high part of the result, masked. -// -// Asm: VPMULHUW, CPU Feature: AVX512EVEX -func (x Uint16x32) MaskedMulHigh(y Uint16x32, z Mask16x32) Uint16x32 - -/* MaskedMulLow */ - -// MulLow multiplies elements and stores the low part of the result, masked. -// -// Asm: VPMULLW, CPU Feature: AVX512EVEX -func (x Int16x8) MaskedMulLow(y Int16x8, z Mask16x8) Int16x8 - -// MulLow multiplies elements and stores the low part of the result, masked. -// -// Asm: VPMULLW, CPU Feature: AVX512EVEX -func (x Int16x16) MaskedMulLow(y Int16x16, z Mask16x16) Int16x16 - -// MulLow multiplies elements and stores the low part of the result, masked. -// -// Asm: VPMULLW, CPU Feature: AVX512EVEX -func (x Int16x32) MaskedMulLow(y Int16x32, z Mask16x32) Int16x32 - -// MulLow multiplies elements and stores the low part of the result, masked. -// -// Asm: VPMULLD, CPU Feature: AVX512EVEX -func (x Int32x4) MaskedMulLow(y Int32x4, z Mask32x4) Int32x4 - -// MulLow multiplies elements and stores the low part of the result, masked. -// -// Asm: VPMULLD, CPU Feature: AVX512EVEX -func (x Int32x8) MaskedMulLow(y Int32x8, z Mask32x8) Int32x8 - -// MulLow multiplies elements and stores the low part of the result, masked. -// -// Asm: VPMULLD, CPU Feature: AVX512EVEX -func (x Int32x16) MaskedMulLow(y Int32x16, z Mask32x16) Int32x16 - -// MulLow multiplies elements and stores the low part of the result, masked. -// -// Asm: VPMULLQ, CPU Feature: AVX512EVEX -func (x Int64x2) MaskedMulLow(y Int64x2, z Mask64x2) Int64x2 - -// MulLow multiplies elements and stores the low part of the result, masked. -// -// Asm: VPMULLQ, CPU Feature: AVX512EVEX -func (x Int64x4) MaskedMulLow(y Int64x4, z Mask64x4) Int64x4 - -// MulLow multiplies elements and stores the low part of the result, masked. -// -// Asm: VPMULLQ, CPU Feature: AVX512EVEX -func (x Int64x8) MaskedMulLow(y Int64x8, z Mask64x8) Int64x8 - -/* MaskedNotEqual */ - -// NotEqual compares for inequality. -// -// Asm: VCMPPS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedNotEqual(y Float32x4, z Mask32x4) Mask32x4 - -// NotEqual compares for inequality. -// -// Asm: VCMPPS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedNotEqual(y Float32x8, z Mask32x8) Mask32x8 - -// NotEqual compares for inequality. -// -// Asm: VCMPPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedNotEqual(y Float32x16, z Mask32x16) Mask32x16 - -// NotEqual compares for inequality. -// -// Asm: VCMPPD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedNotEqual(y Float64x2, z Mask64x2) Mask64x2 - -// NotEqual compares for inequality. -// -// Asm: VCMPPD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedNotEqual(y Float64x4, z Mask64x4) Mask64x4 - -// NotEqual compares for inequality. -// -// Asm: VCMPPD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedNotEqual(y Float64x8, z Mask64x8) Mask64x8 - -// NotEqual compares for inequality. -// -// Asm: VPCMPB, CPU Feature: AVX512EVEX -func (x Int8x16) MaskedNotEqual(y Int8x16, z Mask8x16) Mask8x16 - -// NotEqual compares for inequality. -// -// Asm: VPCMPB, CPU Feature: AVX512EVEX -func (x Int8x32) MaskedNotEqual(y Int8x32, z Mask8x32) Mask8x32 - -// NotEqual compares for inequality. -// -// Asm: VPCMPB, CPU Feature: AVX512EVEX -func (x Int8x64) MaskedNotEqual(y Int8x64, z Mask8x64) Mask8x64 - -// NotEqual compares for inequality. -// -// Asm: VPCMPW, CPU Feature: AVX512EVEX -func (x Int16x8) MaskedNotEqual(y Int16x8, z Mask16x8) Mask16x8 - -// NotEqual compares for inequality. -// -// Asm: VPCMPW, CPU Feature: AVX512EVEX -func (x Int16x16) MaskedNotEqual(y Int16x16, z Mask16x16) Mask16x16 - -// NotEqual compares for inequality. -// -// Asm: VPCMPW, CPU Feature: AVX512EVEX -func (x Int16x32) MaskedNotEqual(y Int16x32, z Mask16x32) Mask16x32 - -// NotEqual compares for inequality. -// -// Asm: VPCMPD, CPU Feature: AVX512EVEX -func (x Int32x4) MaskedNotEqual(y Int32x4, z Mask32x4) Mask32x4 - -// NotEqual compares for inequality. -// -// Asm: VPCMPD, CPU Feature: AVX512EVEX -func (x Int32x8) MaskedNotEqual(y Int32x8, z Mask32x8) Mask32x8 - -// NotEqual compares for inequality. -// -// Asm: VPCMPD, CPU Feature: AVX512EVEX -func (x Int32x16) MaskedNotEqual(y Int32x16, z Mask32x16) Mask32x16 - -// NotEqual compares for inequality. -// -// Asm: VPCMPQ, CPU Feature: AVX512EVEX -func (x Int64x2) MaskedNotEqual(y Int64x2, z Mask64x2) Mask64x2 - -// NotEqual compares for inequality. -// -// Asm: VPCMPQ, CPU Feature: AVX512EVEX -func (x Int64x4) MaskedNotEqual(y Int64x4, z Mask64x4) Mask64x4 - -// NotEqual compares for inequality. -// -// Asm: VPCMPQ, CPU Feature: AVX512EVEX -func (x Int64x8) MaskedNotEqual(y Int64x8, z Mask64x8) Mask64x8 - -// NotEqual compares for inequality. -// -// Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x16) MaskedNotEqual(y Uint8x16, z Mask8x16) Mask8x16 - -// NotEqual compares for inequality. -// -// Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x32) MaskedNotEqual(y Uint8x32, z Mask8x32) Mask8x32 - -// NotEqual compares for inequality. -// -// Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x64) MaskedNotEqual(y Uint8x64, z Mask8x64) Mask8x64 - -// NotEqual compares for inequality. -// -// Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x8) MaskedNotEqual(y Uint16x8, z Mask16x8) Mask16x8 - -// NotEqual compares for inequality. -// -// Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x16) MaskedNotEqual(y Uint16x16, z Mask16x16) Mask16x16 - -// NotEqual compares for inequality. -// -// Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x32) MaskedNotEqual(y Uint16x32, z Mask16x32) Mask16x32 - -// NotEqual compares for inequality. -// -// Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x4) MaskedNotEqual(y Uint32x4, z Mask32x4) Mask32x4 - -// NotEqual compares for inequality. -// -// Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x8) MaskedNotEqual(y Uint32x8, z Mask32x8) Mask32x8 - -// NotEqual compares for inequality. -// -// Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x16) MaskedNotEqual(y Uint32x16, z Mask32x16) Mask32x16 - -// NotEqual compares for inequality. -// -// Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x2) MaskedNotEqual(y Uint64x2, z Mask64x2) Mask64x2 - -// NotEqual compares for inequality. -// -// Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x4) MaskedNotEqual(y Uint64x4, z Mask64x4) Mask64x4 - -// NotEqual compares for inequality. -// -// Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x8) MaskedNotEqual(y Uint64x8, z Mask64x8) Mask64x8 - -/* MaskedOr */ - -// Or performs a masked bitwise OR operation between two vectors. -// -// Asm: VPORD, CPU Feature: AVX512EVEX -func (x Int32x4) MaskedOr(y Int32x4, z Mask32x4) Int32x4 - -// Or performs a masked bitwise OR operation between two vectors. -// -// Asm: VPORD, CPU Feature: AVX512EVEX -func (x Int32x8) MaskedOr(y Int32x8, z Mask32x8) Int32x8 - -// Or performs a masked bitwise OR operation between two vectors. -// -// Asm: VPORD, CPU Feature: AVX512EVEX -func (x Int32x16) MaskedOr(y Int32x16, z Mask32x16) Int32x16 - -// Or performs a masked bitwise OR operation between two vectors. -// -// Asm: VPORQ, CPU Feature: AVX512EVEX -func (x Int64x2) MaskedOr(y Int64x2, z Mask64x2) Int64x2 - -// Or performs a masked bitwise OR operation between two vectors. -// -// Asm: VPORQ, CPU Feature: AVX512EVEX -func (x Int64x4) MaskedOr(y Int64x4, z Mask64x4) Int64x4 - -// Or performs a masked bitwise OR operation between two vectors. -// -// Asm: VPORQ, CPU Feature: AVX512EVEX -func (x Int64x8) MaskedOr(y Int64x8, z Mask64x8) Int64x8 - -// Or performs a masked bitwise OR operation between two vectors. -// -// Asm: VPORD, CPU Feature: AVX512EVEX -func (x Uint32x4) MaskedOr(y Uint32x4, z Mask32x4) Uint32x4 - -// Or performs a masked bitwise OR operation between two vectors. -// -// Asm: VPORD, CPU Feature: AVX512EVEX -func (x Uint32x8) MaskedOr(y Uint32x8, z Mask32x8) Uint32x8 - -// Or performs a masked bitwise OR operation between two vectors. -// -// Asm: VPORD, CPU Feature: AVX512EVEX -func (x Uint32x16) MaskedOr(y Uint32x16, z Mask32x16) Uint32x16 - -// Or performs a masked bitwise OR operation between two vectors. -// -// Asm: VPORQ, CPU Feature: AVX512EVEX -func (x Uint64x2) MaskedOr(y Uint64x2, z Mask64x2) Uint64x2 - -// Or performs a masked bitwise OR operation between two vectors. -// -// Asm: VPORQ, CPU Feature: AVX512EVEX -func (x Uint64x4) MaskedOr(y Uint64x4, z Mask64x4) Uint64x4 - -// Or performs a masked bitwise OR operation between two vectors. -// -// Asm: VPORQ, CPU Feature: AVX512EVEX -func (x Uint64x8) MaskedOr(y Uint64x8, z Mask64x8) Uint64x8 - -/* MaskedPairDotProd */ - -// PairDotProd multiplies the elements and add the pairs together, -// yielding a vector of half as many elements with twice the input element size. -// -// Asm: VPMADDWD, CPU Feature: AVX512EVEX -func (x Int16x8) MaskedPairDotProd(y Int16x8, z Mask16x8) Int32x4 - -// PairDotProd multiplies the elements and add the pairs together, -// yielding a vector of half as many elements with twice the input element size. -// -// Asm: VPMADDWD, CPU Feature: AVX512EVEX -func (x Int16x16) MaskedPairDotProd(y Int16x16, z Mask16x16) Int32x8 - -// PairDotProd multiplies the elements and add the pairs together, -// yielding a vector of half as many elements with twice the input element size. -// -// Asm: VPMADDWD, CPU Feature: AVX512EVEX -func (x Int16x32) MaskedPairDotProd(y Int16x32, z Mask16x32) Int32x16 - -/* MaskedPairDotProdAccumulate */ - -// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. -// -// Asm: VPDPWSSD, CPU Feature: AVX512EVEX -func (x Int32x4) MaskedPairDotProdAccumulate(y Int16x8, z Int16x8, u Mask32x4) Int32x4 - -// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. -// -// Asm: VPDPWSSD, CPU Feature: AVX512EVEX -func (x Int32x8) MaskedPairDotProdAccumulate(y Int16x16, z Int16x16, u Mask32x8) Int32x8 - -// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. -// -// Asm: VPDPWSSD, CPU Feature: AVX512EVEX -func (x Int32x16) MaskedPairDotProdAccumulate(y Int16x32, z Int16x32, u Mask32x16) Int32x16 - -/* MaskedPopCount */ - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTB, CPU Feature: AVX512EVEX -func (x Int8x16) MaskedPopCount(y Mask8x16) Int8x16 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTB, CPU Feature: AVX512EVEX -func (x Int8x32) MaskedPopCount(y Mask8x32) Int8x32 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTB, CPU Feature: AVX512EVEX -func (x Int8x64) MaskedPopCount(y Mask8x64) Int8x64 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTW, CPU Feature: AVX512EVEX -func (x Int16x8) MaskedPopCount(y Mask16x8) Int16x8 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTW, CPU Feature: AVX512EVEX -func (x Int16x16) MaskedPopCount(y Mask16x16) Int16x16 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTW, CPU Feature: AVX512EVEX -func (x Int16x32) MaskedPopCount(y Mask16x32) Int16x32 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTD, CPU Feature: AVX512EVEX -func (x Int32x4) MaskedPopCount(y Mask32x4) Int32x4 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTD, CPU Feature: AVX512EVEX -func (x Int32x8) MaskedPopCount(y Mask32x8) Int32x8 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTD, CPU Feature: AVX512EVEX -func (x Int32x16) MaskedPopCount(y Mask32x16) Int32x16 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX -func (x Int64x2) MaskedPopCount(y Mask64x2) Int64x2 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX -func (x Int64x4) MaskedPopCount(y Mask64x4) Int64x4 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX -func (x Int64x8) MaskedPopCount(y Mask64x8) Int64x8 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTB, CPU Feature: AVX512EVEX -func (x Uint8x16) MaskedPopCount(y Mask8x16) Uint8x16 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTB, CPU Feature: AVX512EVEX -func (x Uint8x32) MaskedPopCount(y Mask8x32) Uint8x32 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTB, CPU Feature: AVX512EVEX -func (x Uint8x64) MaskedPopCount(y Mask8x64) Uint8x64 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTW, CPU Feature: AVX512EVEX -func (x Uint16x8) MaskedPopCount(y Mask16x8) Uint16x8 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTW, CPU Feature: AVX512EVEX -func (x Uint16x16) MaskedPopCount(y Mask16x16) Uint16x16 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTW, CPU Feature: AVX512EVEX -func (x Uint16x32) MaskedPopCount(y Mask16x32) Uint16x32 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTD, CPU Feature: AVX512EVEX -func (x Uint32x4) MaskedPopCount(y Mask32x4) Uint32x4 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTD, CPU Feature: AVX512EVEX -func (x Uint32x8) MaskedPopCount(y Mask32x8) Uint32x8 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTD, CPU Feature: AVX512EVEX -func (x Uint32x16) MaskedPopCount(y Mask32x16) Uint32x16 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX -func (x Uint64x2) MaskedPopCount(y Mask64x2) Uint64x2 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX -func (x Uint64x4) MaskedPopCount(y Mask64x4) Uint64x4 - -// PopCount counts the number of set bits in each element. -// -// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX -func (x Uint64x8) MaskedPopCount(y Mask64x8) Uint64x8 - -/* MaskedRotateAllLeft */ - -// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. -// -// Asm: VPROLD, CPU Feature: AVX512EVEX -func (x Int32x4) MaskedRotateAllLeft(imm uint8, y Mask32x4) Int32x4 - -// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. -// -// Asm: VPROLD, CPU Feature: AVX512EVEX -func (x Int32x8) MaskedRotateAllLeft(imm uint8, y Mask32x8) Int32x8 - -// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. -// -// Asm: VPROLD, CPU Feature: AVX512EVEX -func (x Int32x16) MaskedRotateAllLeft(imm uint8, y Mask32x16) Int32x16 - -// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. -// -// Asm: VPROLQ, CPU Feature: AVX512EVEX -func (x Int64x2) MaskedRotateAllLeft(imm uint8, y Mask64x2) Int64x2 - -// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. -// -// Asm: VPROLQ, CPU Feature: AVX512EVEX -func (x Int64x4) MaskedRotateAllLeft(imm uint8, y Mask64x4) Int64x4 - -// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. -// -// Asm: VPROLQ, CPU Feature: AVX512EVEX -func (x Int64x8) MaskedRotateAllLeft(imm uint8, y Mask64x8) Int64x8 - -// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. -// -// Asm: VPROLD, CPU Feature: AVX512EVEX -func (x Uint32x4) MaskedRotateAllLeft(imm uint8, y Mask32x4) Uint32x4 - -// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. -// -// Asm: VPROLD, CPU Feature: AVX512EVEX -func (x Uint32x8) MaskedRotateAllLeft(imm uint8, y Mask32x8) Uint32x8 - -// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. -// -// Asm: VPROLD, CPU Feature: AVX512EVEX -func (x Uint32x16) MaskedRotateAllLeft(imm uint8, y Mask32x16) Uint32x16 - -// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. -// -// Asm: VPROLQ, CPU Feature: AVX512EVEX -func (x Uint64x2) MaskedRotateAllLeft(imm uint8, y Mask64x2) Uint64x2 - -// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. -// -// Asm: VPROLQ, CPU Feature: AVX512EVEX -func (x Uint64x4) MaskedRotateAllLeft(imm uint8, y Mask64x4) Uint64x4 - -// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. -// -// Asm: VPROLQ, CPU Feature: AVX512EVEX -func (x Uint64x8) MaskedRotateAllLeft(imm uint8, y Mask64x8) Uint64x8 - -/* MaskedRotateAllRight */ - -// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. -// -// Asm: VPRORD, CPU Feature: AVX512EVEX -func (x Int32x4) MaskedRotateAllRight(imm uint8, y Mask32x4) Int32x4 - -// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. -// -// Asm: VPRORD, CPU Feature: AVX512EVEX -func (x Int32x8) MaskedRotateAllRight(imm uint8, y Mask32x8) Int32x8 - -// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. -// -// Asm: VPRORD, CPU Feature: AVX512EVEX -func (x Int32x16) MaskedRotateAllRight(imm uint8, y Mask32x16) Int32x16 - -// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. -// -// Asm: VPRORQ, CPU Feature: AVX512EVEX -func (x Int64x2) MaskedRotateAllRight(imm uint8, y Mask64x2) Int64x2 - -// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. -// -// Asm: VPRORQ, CPU Feature: AVX512EVEX -func (x Int64x4) MaskedRotateAllRight(imm uint8, y Mask64x4) Int64x4 - -// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. -// -// Asm: VPRORQ, CPU Feature: AVX512EVEX -func (x Int64x8) MaskedRotateAllRight(imm uint8, y Mask64x8) Int64x8 - -// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. -// -// Asm: VPRORD, CPU Feature: AVX512EVEX -func (x Uint32x4) MaskedRotateAllRight(imm uint8, y Mask32x4) Uint32x4 - -// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. -// -// Asm: VPRORD, CPU Feature: AVX512EVEX -func (x Uint32x8) MaskedRotateAllRight(imm uint8, y Mask32x8) Uint32x8 - -// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. -// -// Asm: VPRORD, CPU Feature: AVX512EVEX -func (x Uint32x16) MaskedRotateAllRight(imm uint8, y Mask32x16) Uint32x16 - -// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. -// -// Asm: VPRORQ, CPU Feature: AVX512EVEX -func (x Uint64x2) MaskedRotateAllRight(imm uint8, y Mask64x2) Uint64x2 - -// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. -// -// Asm: VPRORQ, CPU Feature: AVX512EVEX -func (x Uint64x4) MaskedRotateAllRight(imm uint8, y Mask64x4) Uint64x4 - -// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. -// -// Asm: VPRORQ, CPU Feature: AVX512EVEX -func (x Uint64x8) MaskedRotateAllRight(imm uint8, y Mask64x8) Uint64x8 - -/* MaskedRotateLeft */ - -// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. -// -// Asm: VPROLVD, CPU Feature: AVX512EVEX -func (x Int32x4) MaskedRotateLeft(y Int32x4, z Mask32x4) Int32x4 - -// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. -// -// Asm: VPROLVD, CPU Feature: AVX512EVEX -func (x Int32x8) MaskedRotateLeft(y Int32x8, z Mask32x8) Int32x8 - -// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. -// -// Asm: VPROLVD, CPU Feature: AVX512EVEX -func (x Int32x16) MaskedRotateLeft(y Int32x16, z Mask32x16) Int32x16 - -// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. -// -// Asm: VPROLVQ, CPU Feature: AVX512EVEX -func (x Int64x2) MaskedRotateLeft(y Int64x2, z Mask64x2) Int64x2 - -// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. -// -// Asm: VPROLVQ, CPU Feature: AVX512EVEX -func (x Int64x4) MaskedRotateLeft(y Int64x4, z Mask64x4) Int64x4 - -// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. -// -// Asm: VPROLVQ, CPU Feature: AVX512EVEX -func (x Int64x8) MaskedRotateLeft(y Int64x8, z Mask64x8) Int64x8 - -// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. -// -// Asm: VPROLVD, CPU Feature: AVX512EVEX -func (x Uint32x4) MaskedRotateLeft(y Uint32x4, z Mask32x4) Uint32x4 - -// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. -// -// Asm: VPROLVD, CPU Feature: AVX512EVEX -func (x Uint32x8) MaskedRotateLeft(y Uint32x8, z Mask32x8) Uint32x8 - -// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. -// -// Asm: VPROLVD, CPU Feature: AVX512EVEX -func (x Uint32x16) MaskedRotateLeft(y Uint32x16, z Mask32x16) Uint32x16 - -// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. -// -// Asm: VPROLVQ, CPU Feature: AVX512EVEX -func (x Uint64x2) MaskedRotateLeft(y Uint64x2, z Mask64x2) Uint64x2 - -// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. -// -// Asm: VPROLVQ, CPU Feature: AVX512EVEX -func (x Uint64x4) MaskedRotateLeft(y Uint64x4, z Mask64x4) Uint64x4 - -// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. -// -// Asm: VPROLVQ, CPU Feature: AVX512EVEX -func (x Uint64x8) MaskedRotateLeft(y Uint64x8, z Mask64x8) Uint64x8 - -/* MaskedRotateRight */ - -// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. -// -// Asm: VPRORVD, CPU Feature: AVX512EVEX -func (x Int32x4) MaskedRotateRight(y Int32x4, z Mask32x4) Int32x4 - -// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. -// -// Asm: VPRORVD, CPU Feature: AVX512EVEX -func (x Int32x8) MaskedRotateRight(y Int32x8, z Mask32x8) Int32x8 - -// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. -// -// Asm: VPRORVD, CPU Feature: AVX512EVEX -func (x Int32x16) MaskedRotateRight(y Int32x16, z Mask32x16) Int32x16 - -// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. -// -// Asm: VPRORVQ, CPU Feature: AVX512EVEX -func (x Int64x2) MaskedRotateRight(y Int64x2, z Mask64x2) Int64x2 - -// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. -// -// Asm: VPRORVQ, CPU Feature: AVX512EVEX -func (x Int64x4) MaskedRotateRight(y Int64x4, z Mask64x4) Int64x4 - -// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. -// -// Asm: VPRORVQ, CPU Feature: AVX512EVEX -func (x Int64x8) MaskedRotateRight(y Int64x8, z Mask64x8) Int64x8 - -// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. -// -// Asm: VPRORVD, CPU Feature: AVX512EVEX -func (x Uint32x4) MaskedRotateRight(y Uint32x4, z Mask32x4) Uint32x4 - -// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. -// -// Asm: VPRORVD, CPU Feature: AVX512EVEX -func (x Uint32x8) MaskedRotateRight(y Uint32x8, z Mask32x8) Uint32x8 - -// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. -// -// Asm: VPRORVD, CPU Feature: AVX512EVEX -func (x Uint32x16) MaskedRotateRight(y Uint32x16, z Mask32x16) Uint32x16 - -// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. -// -// Asm: VPRORVQ, CPU Feature: AVX512EVEX -func (x Uint64x2) MaskedRotateRight(y Uint64x2, z Mask64x2) Uint64x2 - -// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. -// -// Asm: VPRORVQ, CPU Feature: AVX512EVEX -func (x Uint64x4) MaskedRotateRight(y Uint64x4, z Mask64x4) Uint64x4 - -// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. -// -// Asm: VPRORVQ, CPU Feature: AVX512EVEX -func (x Uint64x8) MaskedRotateRight(y Uint64x8, z Mask64x8) Uint64x8 - -/* MaskedRoundWithPrecision */ - -// RoundWithPrecision rounds elements with specified precision. -// -// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedRoundWithPrecision(imm uint8, y Mask32x4) Float32x4 - -// RoundWithPrecision rounds elements with specified precision. -// -// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedRoundWithPrecision(imm uint8, y Mask32x8) Float32x8 - -// RoundWithPrecision rounds elements with specified precision. -// -// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedRoundWithPrecision(imm uint8, y Mask32x16) Float32x16 - -// RoundWithPrecision rounds elements with specified precision. -// -// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedRoundWithPrecision(imm uint8, y Mask64x2) Float64x2 - -// RoundWithPrecision rounds elements with specified precision. -// -// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedRoundWithPrecision(imm uint8, y Mask64x4) Float64x4 - -// RoundWithPrecision rounds elements with specified precision. -// -// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedRoundWithPrecision(imm uint8, y Mask64x8) Float64x8 - -/* MaskedSaturatedAdd */ - -// SaturatedAdd adds corresponding elements of two vectors with saturation. -// -// Asm: VPADDSB, CPU Feature: AVX512EVEX -func (x Int8x16) MaskedSaturatedAdd(y Int8x16, z Mask8x16) Int8x16 - -// SaturatedAdd adds corresponding elements of two vectors with saturation. -// -// Asm: VPADDSB, CPU Feature: AVX512EVEX -func (x Int8x32) MaskedSaturatedAdd(y Int8x32, z Mask8x32) Int8x32 - -// SaturatedAdd adds corresponding elements of two vectors with saturation. -// -// Asm: VPADDSB, CPU Feature: AVX512EVEX -func (x Int8x64) MaskedSaturatedAdd(y Int8x64, z Mask8x64) Int8x64 - -// SaturatedAdd adds corresponding elements of two vectors with saturation. -// -// Asm: VPADDSW, CPU Feature: AVX512EVEX -func (x Int16x8) MaskedSaturatedAdd(y Int16x8, z Mask16x8) Int16x8 - -// SaturatedAdd adds corresponding elements of two vectors with saturation. -// -// Asm: VPADDSW, CPU Feature: AVX512EVEX -func (x Int16x16) MaskedSaturatedAdd(y Int16x16, z Mask16x16) Int16x16 - -// SaturatedAdd adds corresponding elements of two vectors with saturation. -// -// Asm: VPADDSW, CPU Feature: AVX512EVEX -func (x Int16x32) MaskedSaturatedAdd(y Int16x32, z Mask16x32) Int16x32 - -// SaturatedAdd adds corresponding elements of two vectors with saturation. -// -// Asm: VPADDSB, CPU Feature: AVX512EVEX -func (x Uint8x16) MaskedSaturatedAdd(y Uint8x16, z Mask8x16) Uint8x16 - -// SaturatedAdd adds corresponding elements of two vectors with saturation. -// -// Asm: VPADDSB, CPU Feature: AVX512EVEX -func (x Uint8x32) MaskedSaturatedAdd(y Uint8x32, z Mask8x32) Uint8x32 - -// SaturatedAdd adds corresponding elements of two vectors with saturation. -// -// Asm: VPADDSB, CPU Feature: AVX512EVEX -func (x Uint8x64) MaskedSaturatedAdd(y Uint8x64, z Mask8x64) Uint8x64 - -// SaturatedAdd adds corresponding elements of two vectors with saturation. -// -// Asm: VPADDSW, CPU Feature: AVX512EVEX -func (x Uint16x8) MaskedSaturatedAdd(y Uint16x8, z Mask16x8) Uint16x8 - -// SaturatedAdd adds corresponding elements of two vectors with saturation. -// -// Asm: VPADDSW, CPU Feature: AVX512EVEX -func (x Uint16x16) MaskedSaturatedAdd(y Uint16x16, z Mask16x16) Uint16x16 - -// SaturatedAdd adds corresponding elements of two vectors with saturation. -// -// Asm: VPADDSW, CPU Feature: AVX512EVEX -func (x Uint16x32) MaskedSaturatedAdd(y Uint16x32, z Mask16x32) Uint16x32 - -/* MaskedSaturatedPairDotProdAccumulate */ - -// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. -// -// Asm: VPDPWSSDS, CPU Feature: AVX512EVEX -func (x Int32x4) MaskedSaturatedPairDotProdAccumulate(y Int16x8, z Int16x8, u Mask32x4) Int32x4 - -// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. -// -// Asm: VPDPWSSDS, CPU Feature: AVX512EVEX -func (x Int32x8) MaskedSaturatedPairDotProdAccumulate(y Int16x16, z Int16x16, u Mask32x8) Int32x8 - -// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. -// -// Asm: VPDPWSSDS, CPU Feature: AVX512EVEX -func (x Int32x16) MaskedSaturatedPairDotProdAccumulate(y Int16x32, z Int16x32, u Mask32x16) Int32x16 - -/* MaskedSaturatedSub */ - -// SaturatedSub subtracts corresponding elements of two vectors with saturation. -// -// Asm: VPSUBSB, CPU Feature: AVX512EVEX -func (x Int8x16) MaskedSaturatedSub(y Int8x16, z Mask8x16) Int8x16 - -// SaturatedSub subtracts corresponding elements of two vectors with saturation. -// -// Asm: VPSUBSB, CPU Feature: AVX512EVEX -func (x Int8x32) MaskedSaturatedSub(y Int8x32, z Mask8x32) Int8x32 - -// SaturatedSub subtracts corresponding elements of two vectors with saturation. -// -// Asm: VPSUBSB, CPU Feature: AVX512EVEX -func (x Int8x64) MaskedSaturatedSub(y Int8x64, z Mask8x64) Int8x64 - -// SaturatedSub subtracts corresponding elements of two vectors with saturation. -// -// Asm: VPSUBSW, CPU Feature: AVX512EVEX -func (x Int16x8) MaskedSaturatedSub(y Int16x8, z Mask16x8) Int16x8 - -// SaturatedSub subtracts corresponding elements of two vectors with saturation. -// -// Asm: VPSUBSW, CPU Feature: AVX512EVEX -func (x Int16x16) MaskedSaturatedSub(y Int16x16, z Mask16x16) Int16x16 - -// SaturatedSub subtracts corresponding elements of two vectors with saturation. -// -// Asm: VPSUBSW, CPU Feature: AVX512EVEX -func (x Int16x32) MaskedSaturatedSub(y Int16x32, z Mask16x32) Int16x32 - -// SaturatedSub subtracts corresponding elements of two vectors with saturation. -// -// Asm: VPSUBSB, CPU Feature: AVX512EVEX -func (x Uint8x16) MaskedSaturatedSub(y Uint8x16, z Mask8x16) Uint8x16 - -// SaturatedSub subtracts corresponding elements of two vectors with saturation. -// -// Asm: VPSUBSB, CPU Feature: AVX512EVEX -func (x Uint8x32) MaskedSaturatedSub(y Uint8x32, z Mask8x32) Uint8x32 - -// SaturatedSub subtracts corresponding elements of two vectors with saturation. -// -// Asm: VPSUBSB, CPU Feature: AVX512EVEX -func (x Uint8x64) MaskedSaturatedSub(y Uint8x64, z Mask8x64) Uint8x64 - -// SaturatedSub subtracts corresponding elements of two vectors with saturation. -// -// Asm: VPSUBSW, CPU Feature: AVX512EVEX -func (x Uint16x8) MaskedSaturatedSub(y Uint16x8, z Mask16x8) Uint16x8 - -// SaturatedSub subtracts corresponding elements of two vectors with saturation. -// -// Asm: VPSUBSW, CPU Feature: AVX512EVEX -func (x Uint16x16) MaskedSaturatedSub(y Uint16x16, z Mask16x16) Uint16x16 - -// SaturatedSub subtracts corresponding elements of two vectors with saturation. -// -// Asm: VPSUBSW, CPU Feature: AVX512EVEX -func (x Uint16x32) MaskedSaturatedSub(y Uint16x32, z Mask16x32) Uint16x32 - -/* MaskedSaturatedUnsignedSignedPairDotProd */ - -// SaturatedPairDotProd multiplies the elements and add the pairs together with saturation, -// yielding a vector of half as many elements with twice the input element size. -// -// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX -func (x Uint8x16) MaskedSaturatedUnsignedSignedPairDotProd(y Int8x16, z Mask16x8) Int16x8 - -// SaturatedPairDotProd multiplies the elements and add the pairs together with saturation, -// yielding a vector of half as many elements with twice the input element size. -// -// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX -func (x Uint8x32) MaskedSaturatedUnsignedSignedPairDotProd(y Int8x32, z Mask16x16) Int16x16 - -// SaturatedPairDotProd multiplies the elements and add the pairs together with saturation, -// yielding a vector of half as many elements with twice the input element size. -// -// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX -func (x Uint8x64) MaskedSaturatedUnsignedSignedPairDotProd(y Int8x64, z Mask16x32) Int16x32 - -/* MaskedSaturatedUnsignedSignedQuadDotProdAccumulate */ - -// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. -// -// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX -func (x Int32x4) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int8x16, u Mask32x4) Int32x4 - -// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. -// -// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX -func (x Int32x8) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int8x32, u Mask32x8) Int32x8 - -// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. -// -// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX -func (x Int32x16) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int8x64, u Mask32x16) Int32x16 - -// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. -// -// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX -func (x Uint32x4) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int8x16, u Mask32x4) Uint32x4 - -// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. -// -// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX -func (x Uint32x8) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int8x32, u Mask32x8) Uint32x8 - -// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. -// -// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX -func (x Uint32x16) MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int8x64, u Mask32x16) Uint32x16 - -/* MaskedShiftAllLeft */ - -// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. -// -// Asm: VPSLLQ, CPU Feature: AVX512EVEX -func (x Int64x2) MaskedShiftAllLeft(y uint64, z Mask64x2) Int64x2 - -// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. -// -// Asm: VPSLLQ, CPU Feature: AVX512EVEX -func (x Int64x4) MaskedShiftAllLeft(y uint64, z Mask64x4) Int64x4 - -// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. -// -// Asm: VPSLLQ, CPU Feature: AVX512EVEX -func (x Int64x8) MaskedShiftAllLeft(y uint64, z Mask64x8) Int64x8 - -// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. -// -// Asm: VPSLLQ, CPU Feature: AVX512EVEX -func (x Uint64x2) MaskedShiftAllLeft(y uint64, z Mask64x2) Uint64x2 - -// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. -// -// Asm: VPSLLQ, CPU Feature: AVX512EVEX -func (x Uint64x4) MaskedShiftAllLeft(y uint64, z Mask64x4) Uint64x4 - -// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. -// -// Asm: VPSLLQ, CPU Feature: AVX512EVEX -func (x Uint64x8) MaskedShiftAllLeft(y uint64, z Mask64x8) Uint64x8 - -/* MaskedShiftAllLeftAndFillUpperFrom */ - -// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. -// -// Asm: VPSHLDW, CPU Feature: AVX512EVEX -func (x Int16x8) MaskedShiftAllLeftAndFillUpperFrom(imm uint8, y Int16x8, z Mask16x8) Int16x8 - -// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. -// -// Asm: VPSHLDW, CPU Feature: AVX512EVEX -func (x Int16x16) MaskedShiftAllLeftAndFillUpperFrom(imm uint8, y Int16x16, z Mask16x16) Int16x16 - -// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. -// -// Asm: VPSHLDW, CPU Feature: AVX512EVEX -func (x Int16x32) MaskedShiftAllLeftAndFillUpperFrom(imm uint8, y Int16x32, z Mask16x32) Int16x32 - -// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. -// -// Asm: VPSHLDD, CPU Feature: AVX512EVEX -func (x Int32x4) MaskedShiftAllLeftAndFillUpperFrom(imm uint8, y Int32x4, z Mask32x4) Int32x4 - -// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. -// -// Asm: VPSHLDD, CPU Feature: AVX512EVEX -func (x Int32x8) MaskedShiftAllLeftAndFillUpperFrom(imm uint8, y Int32x8, z Mask32x8) Int32x8 - -// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. -// -// Asm: VPSHLDD, CPU Feature: AVX512EVEX -func (x Int32x16) MaskedShiftAllLeftAndFillUpperFrom(imm uint8, y Int32x16, z Mask32x16) Int32x16 - -// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. -// -// Asm: VPSHLDQ, CPU Feature: AVX512EVEX -func (x Int64x2) MaskedShiftAllLeftAndFillUpperFrom(imm uint8, y Int64x2, z Mask64x2) Int64x2 - -// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. -// -// Asm: VPSHLDQ, CPU Feature: AVX512EVEX -func (x Int64x4) MaskedShiftAllLeftAndFillUpperFrom(imm uint8, y Int64x4, z Mask64x4) Int64x4 - -// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. -// -// Asm: VPSHLDQ, CPU Feature: AVX512EVEX -func (x Int64x8) MaskedShiftAllLeftAndFillUpperFrom(imm uint8, y Int64x8, z Mask64x8) Int64x8 - -// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. -// -// Asm: VPSHLDW, CPU Feature: AVX512EVEX -func (x Uint16x8) MaskedShiftAllLeftAndFillUpperFrom(imm uint8, y Uint16x8, z Mask16x8) Uint16x8 - -// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. -// -// Asm: VPSHLDW, CPU Feature: AVX512EVEX -func (x Uint16x16) MaskedShiftAllLeftAndFillUpperFrom(imm uint8, y Uint16x16, z Mask16x16) Uint16x16 - -// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. -// -// Asm: VPSHLDW, CPU Feature: AVX512EVEX -func (x Uint16x32) MaskedShiftAllLeftAndFillUpperFrom(imm uint8, y Uint16x32, z Mask16x32) Uint16x32 - -// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. -// -// Asm: VPSHLDD, CPU Feature: AVX512EVEX -func (x Uint32x4) MaskedShiftAllLeftAndFillUpperFrom(imm uint8, y Uint32x4, z Mask32x4) Uint32x4 - -// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. -// -// Asm: VPSHLDD, CPU Feature: AVX512EVEX -func (x Uint32x8) MaskedShiftAllLeftAndFillUpperFrom(imm uint8, y Uint32x8, z Mask32x8) Uint32x8 - -// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. -// -// Asm: VPSHLDD, CPU Feature: AVX512EVEX -func (x Uint32x16) MaskedShiftAllLeftAndFillUpperFrom(imm uint8, y Uint32x16, z Mask32x16) Uint32x16 - -// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. -// -// Asm: VPSHLDQ, CPU Feature: AVX512EVEX -func (x Uint64x2) MaskedShiftAllLeftAndFillUpperFrom(imm uint8, y Uint64x2, z Mask64x2) Uint64x2 - -// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. -// -// Asm: VPSHLDQ, CPU Feature: AVX512EVEX -func (x Uint64x4) MaskedShiftAllLeftAndFillUpperFrom(imm uint8, y Uint64x4, z Mask64x4) Uint64x4 - -// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. -// -// Asm: VPSHLDQ, CPU Feature: AVX512EVEX -func (x Uint64x8) MaskedShiftAllLeftAndFillUpperFrom(imm uint8, y Uint64x8, z Mask64x8) Uint64x8 - -/* MaskedShiftAllRight */ - -// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. -// -// Asm: VPSRLQ, CPU Feature: AVX512EVEX -func (x Int64x2) MaskedShiftAllRight(y uint64, z Mask64x2) Int64x2 - -// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. -// -// Asm: VPSRLQ, CPU Feature: AVX512EVEX -func (x Int64x4) MaskedShiftAllRight(y uint64, z Mask64x4) Int64x4 - -// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. -// -// Asm: VPSRLQ, CPU Feature: AVX512EVEX -func (x Int64x8) MaskedShiftAllRight(y uint64, z Mask64x8) Int64x8 - -// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. -// -// Asm: VPSRLQ, CPU Feature: AVX512EVEX -func (x Uint64x2) MaskedShiftAllRight(y uint64, z Mask64x2) Uint64x2 - -// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. -// -// Asm: VPSRLQ, CPU Feature: AVX512EVEX -func (x Uint64x4) MaskedShiftAllRight(y uint64, z Mask64x4) Uint64x4 - -// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. -// -// Asm: VPSRLQ, CPU Feature: AVX512EVEX -func (x Uint64x8) MaskedShiftAllRight(y uint64, z Mask64x8) Uint64x8 - -/* MaskedShiftAllRightAndFillUpperFrom */ - -// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. -// -// Asm: VPSHRDW, CPU Feature: AVX512EVEX -func (x Int16x8) MaskedShiftAllRightAndFillUpperFrom(imm uint8, y Int16x8, z Mask16x8) Int16x8 - -// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. -// -// Asm: VPSHRDW, CPU Feature: AVX512EVEX -func (x Int16x16) MaskedShiftAllRightAndFillUpperFrom(imm uint8, y Int16x16, z Mask16x16) Int16x16 - -// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. -// -// Asm: VPSHRDW, CPU Feature: AVX512EVEX -func (x Int16x32) MaskedShiftAllRightAndFillUpperFrom(imm uint8, y Int16x32, z Mask16x32) Int16x32 - -// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. -// -// Asm: VPSHRDD, CPU Feature: AVX512EVEX -func (x Int32x4) MaskedShiftAllRightAndFillUpperFrom(imm uint8, y Int32x4, z Mask32x4) Int32x4 - -// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. -// -// Asm: VPSHRDD, CPU Feature: AVX512EVEX -func (x Int32x8) MaskedShiftAllRightAndFillUpperFrom(imm uint8, y Int32x8, z Mask32x8) Int32x8 - -// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. -// -// Asm: VPSHRDD, CPU Feature: AVX512EVEX -func (x Int32x16) MaskedShiftAllRightAndFillUpperFrom(imm uint8, y Int32x16, z Mask32x16) Int32x16 - -// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. -// -// Asm: VPSHRDQ, CPU Feature: AVX512EVEX -func (x Int64x2) MaskedShiftAllRightAndFillUpperFrom(imm uint8, y Int64x2, z Mask64x2) Int64x2 - -// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. -// -// Asm: VPSHRDQ, CPU Feature: AVX512EVEX -func (x Int64x4) MaskedShiftAllRightAndFillUpperFrom(imm uint8, y Int64x4, z Mask64x4) Int64x4 - -// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. -// -// Asm: VPSHRDQ, CPU Feature: AVX512EVEX -func (x Int64x8) MaskedShiftAllRightAndFillUpperFrom(imm uint8, y Int64x8, z Mask64x8) Int64x8 - -// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. -// -// Asm: VPSHRDW, CPU Feature: AVX512EVEX -func (x Uint16x8) MaskedShiftAllRightAndFillUpperFrom(imm uint8, y Uint16x8, z Mask16x8) Uint16x8 - -// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. -// -// Asm: VPSHRDW, CPU Feature: AVX512EVEX -func (x Uint16x16) MaskedShiftAllRightAndFillUpperFrom(imm uint8, y Uint16x16, z Mask16x16) Uint16x16 - -// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. -// -// Asm: VPSHRDW, CPU Feature: AVX512EVEX -func (x Uint16x32) MaskedShiftAllRightAndFillUpperFrom(imm uint8, y Uint16x32, z Mask16x32) Uint16x32 - -// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. -// -// Asm: VPSHRDD, CPU Feature: AVX512EVEX -func (x Uint32x4) MaskedShiftAllRightAndFillUpperFrom(imm uint8, y Uint32x4, z Mask32x4) Uint32x4 - -// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. -// -// Asm: VPSHRDD, CPU Feature: AVX512EVEX -func (x Uint32x8) MaskedShiftAllRightAndFillUpperFrom(imm uint8, y Uint32x8, z Mask32x8) Uint32x8 - -// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. -// -// Asm: VPSHRDD, CPU Feature: AVX512EVEX -func (x Uint32x16) MaskedShiftAllRightAndFillUpperFrom(imm uint8, y Uint32x16, z Mask32x16) Uint32x16 - -// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. -// -// Asm: VPSHRDQ, CPU Feature: AVX512EVEX -func (x Uint64x2) MaskedShiftAllRightAndFillUpperFrom(imm uint8, y Uint64x2, z Mask64x2) Uint64x2 - -// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. -// -// Asm: VPSHRDQ, CPU Feature: AVX512EVEX -func (x Uint64x4) MaskedShiftAllRightAndFillUpperFrom(imm uint8, y Uint64x4, z Mask64x4) Uint64x4 - -// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the -// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. -// -// Asm: VPSHRDQ, CPU Feature: AVX512EVEX -func (x Uint64x8) MaskedShiftAllRightAndFillUpperFrom(imm uint8, y Uint64x8, z Mask64x8) Uint64x8 - -/* MaskedShiftAllRightSignExtended */ - -// ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. -// -// Asm: VPSRAQ, CPU Feature: AVX512EVEX -func (x Int64x2) MaskedShiftAllRightSignExtended(y uint64, z Mask64x2) Int64x2 - -// ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. -// -// Asm: VPSRAQ, CPU Feature: AVX512EVEX -func (x Int64x4) MaskedShiftAllRightSignExtended(y uint64, z Mask64x4) Int64x4 - -// ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. -// -// Asm: VPSRAQ, CPU Feature: AVX512EVEX -func (x Int64x8) MaskedShiftAllRightSignExtended(y uint64, z Mask64x8) Int64x8 - -/* MaskedShiftLeft */ - -// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. -// -// Asm: VPSLLVW, CPU Feature: AVX512EVEX -func (x Int16x8) MaskedShiftLeft(y Int16x8, z Mask16x8) Int16x8 - -// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. -// -// Asm: VPSLLVW, CPU Feature: AVX512EVEX -func (x Int16x16) MaskedShiftLeft(y Int16x16, z Mask16x16) Int16x16 - -// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. -// -// Asm: VPSLLVW, CPU Feature: AVX512EVEX -func (x Int16x32) MaskedShiftLeft(y Int16x32, z Mask16x32) Int16x32 - -// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. -// -// Asm: VPSLLVD, CPU Feature: AVX512EVEX -func (x Int32x4) MaskedShiftLeft(y Int32x4, z Mask32x4) Int32x4 - -// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. -// -// Asm: VPSLLVD, CPU Feature: AVX512EVEX -func (x Int32x8) MaskedShiftLeft(y Int32x8, z Mask32x8) Int32x8 - -// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. -// -// Asm: VPSLLVD, CPU Feature: AVX512EVEX -func (x Int32x16) MaskedShiftLeft(y Int32x16, z Mask32x16) Int32x16 - -// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. -// -// Asm: VPSLLVQ, CPU Feature: AVX512EVEX -func (x Int64x2) MaskedShiftLeft(y Int64x2, z Mask64x2) Int64x2 - -// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. -// -// Asm: VPSLLVQ, CPU Feature: AVX512EVEX -func (x Int64x4) MaskedShiftLeft(y Int64x4, z Mask64x4) Int64x4 - -// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. -// -// Asm: VPSLLVQ, CPU Feature: AVX512EVEX -func (x Int64x8) MaskedShiftLeft(y Int64x8, z Mask64x8) Int64x8 - -// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. -// -// Asm: VPSLLVW, CPU Feature: AVX512EVEX -func (x Uint16x8) MaskedShiftLeft(y Uint16x8, z Mask16x8) Uint16x8 - -// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. -// -// Asm: VPSLLVW, CPU Feature: AVX512EVEX -func (x Uint16x16) MaskedShiftLeft(y Uint16x16, z Mask16x16) Uint16x16 - -// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. -// -// Asm: VPSLLVW, CPU Feature: AVX512EVEX -func (x Uint16x32) MaskedShiftLeft(y Uint16x32, z Mask16x32) Uint16x32 - -// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. -// -// Asm: VPSLLVD, CPU Feature: AVX512EVEX -func (x Uint32x4) MaskedShiftLeft(y Uint32x4, z Mask32x4) Uint32x4 - -// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. -// -// Asm: VPSLLVD, CPU Feature: AVX512EVEX -func (x Uint32x8) MaskedShiftLeft(y Uint32x8, z Mask32x8) Uint32x8 - -// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. -// -// Asm: VPSLLVD, CPU Feature: AVX512EVEX -func (x Uint32x16) MaskedShiftLeft(y Uint32x16, z Mask32x16) Uint32x16 - -// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. -// -// Asm: VPSLLVQ, CPU Feature: AVX512EVEX -func (x Uint64x2) MaskedShiftLeft(y Uint64x2, z Mask64x2) Uint64x2 - -// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. -// -// Asm: VPSLLVQ, CPU Feature: AVX512EVEX -func (x Uint64x4) MaskedShiftLeft(y Uint64x4, z Mask64x4) Uint64x4 - -// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. -// -// Asm: VPSLLVQ, CPU Feature: AVX512EVEX -func (x Uint64x8) MaskedShiftLeft(y Uint64x8, z Mask64x8) Uint64x8 - -/* MaskedShiftLeftAndFillUpperFrom */ - -// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. -// -// Asm: VPSHLDVW, CPU Feature: AVX512EVEX -func (x Int16x8) MaskedShiftLeftAndFillUpperFrom(y Int16x8, z Int16x8, u Mask16x8) Int16x8 - -// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. -// -// Asm: VPSHLDVW, CPU Feature: AVX512EVEX -func (x Int16x16) MaskedShiftLeftAndFillUpperFrom(y Int16x16, z Int16x16, u Mask16x16) Int16x16 - -// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. -// -// Asm: VPSHLDVW, CPU Feature: AVX512EVEX -func (x Int16x32) MaskedShiftLeftAndFillUpperFrom(y Int16x32, z Int16x32, u Mask16x32) Int16x32 - -// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. -// -// Asm: VPSHLDVD, CPU Feature: AVX512EVEX -func (x Int32x4) MaskedShiftLeftAndFillUpperFrom(y Int32x4, z Int32x4, u Mask32x4) Int32x4 - -// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. -// -// Asm: VPSHLDVD, CPU Feature: AVX512EVEX -func (x Int32x8) MaskedShiftLeftAndFillUpperFrom(y Int32x8, z Int32x8, u Mask32x8) Int32x8 - -// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. -// -// Asm: VPSHLDVD, CPU Feature: AVX512EVEX -func (x Int32x16) MaskedShiftLeftAndFillUpperFrom(y Int32x16, z Int32x16, u Mask32x16) Int32x16 - -// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. -// -// Asm: VPSHLDVQ, CPU Feature: AVX512EVEX -func (x Int64x2) MaskedShiftLeftAndFillUpperFrom(y Int64x2, z Int64x2, u Mask64x2) Int64x2 - -// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. -// -// Asm: VPSHLDVQ, CPU Feature: AVX512EVEX -func (x Int64x4) MaskedShiftLeftAndFillUpperFrom(y Int64x4, z Int64x4, u Mask64x4) Int64x4 - -// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. -// -// Asm: VPSHLDVQ, CPU Feature: AVX512EVEX -func (x Int64x8) MaskedShiftLeftAndFillUpperFrom(y Int64x8, z Int64x8, u Mask64x8) Int64x8 - -// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. -// -// Asm: VPSHLDVW, CPU Feature: AVX512EVEX -func (x Uint16x8) MaskedShiftLeftAndFillUpperFrom(y Uint16x8, z Uint16x8, u Mask16x8) Uint16x8 - -// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. -// -// Asm: VPSHLDVW, CPU Feature: AVX512EVEX -func (x Uint16x16) MaskedShiftLeftAndFillUpperFrom(y Uint16x16, z Uint16x16, u Mask16x16) Uint16x16 - -// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. -// -// Asm: VPSHLDVW, CPU Feature: AVX512EVEX -func (x Uint16x32) MaskedShiftLeftAndFillUpperFrom(y Uint16x32, z Uint16x32, u Mask16x32) Uint16x32 - -// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. -// -// Asm: VPSHLDVD, CPU Feature: AVX512EVEX -func (x Uint32x4) MaskedShiftLeftAndFillUpperFrom(y Uint32x4, z Uint32x4, u Mask32x4) Uint32x4 - -// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. -// -// Asm: VPSHLDVD, CPU Feature: AVX512EVEX -func (x Uint32x8) MaskedShiftLeftAndFillUpperFrom(y Uint32x8, z Uint32x8, u Mask32x8) Uint32x8 - -// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. -// -// Asm: VPSHLDVD, CPU Feature: AVX512EVEX -func (x Uint32x16) MaskedShiftLeftAndFillUpperFrom(y Uint32x16, z Uint32x16, u Mask32x16) Uint32x16 - -// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. -// -// Asm: VPSHLDVQ, CPU Feature: AVX512EVEX -func (x Uint64x2) MaskedShiftLeftAndFillUpperFrom(y Uint64x2, z Uint64x2, u Mask64x2) Uint64x2 - -// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. -// -// Asm: VPSHLDVQ, CPU Feature: AVX512EVEX -func (x Uint64x4) MaskedShiftLeftAndFillUpperFrom(y Uint64x4, z Uint64x4, u Mask64x4) Uint64x4 - -// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. -// -// Asm: VPSHLDVQ, CPU Feature: AVX512EVEX -func (x Uint64x8) MaskedShiftLeftAndFillUpperFrom(y Uint64x8, z Uint64x8, u Mask64x8) Uint64x8 - -/* MaskedShiftRight */ - -// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. -// -// Asm: VPSRLVW, CPU Feature: AVX512EVEX -func (x Int16x8) MaskedShiftRight(y Int16x8, z Mask16x8) Int16x8 - -// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. -// -// Asm: VPSRLVW, CPU Feature: AVX512EVEX -func (x Int16x16) MaskedShiftRight(y Int16x16, z Mask16x16) Int16x16 - -// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. -// -// Asm: VPSRLVW, CPU Feature: AVX512EVEX -func (x Int16x32) MaskedShiftRight(y Int16x32, z Mask16x32) Int16x32 - -// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. -// -// Asm: VPSRLVD, CPU Feature: AVX512EVEX -func (x Int32x4) MaskedShiftRight(y Int32x4, z Mask32x4) Int32x4 - -// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. -// -// Asm: VPSRLVD, CPU Feature: AVX512EVEX -func (x Int32x8) MaskedShiftRight(y Int32x8, z Mask32x8) Int32x8 - -// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. -// -// Asm: VPSRLVD, CPU Feature: AVX512EVEX -func (x Int32x16) MaskedShiftRight(y Int32x16, z Mask32x16) Int32x16 - -// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. -// -// Asm: VPSRLVQ, CPU Feature: AVX512EVEX -func (x Int64x2) MaskedShiftRight(y Int64x2, z Mask64x2) Int64x2 - -// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. -// -// Asm: VPSRLVQ, CPU Feature: AVX512EVEX -func (x Int64x4) MaskedShiftRight(y Int64x4, z Mask64x4) Int64x4 - -// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. -// -// Asm: VPSRLVQ, CPU Feature: AVX512EVEX -func (x Int64x8) MaskedShiftRight(y Int64x8, z Mask64x8) Int64x8 - -// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. -// -// Asm: VPSRLVW, CPU Feature: AVX512EVEX -func (x Uint16x8) MaskedShiftRight(y Uint16x8, z Mask16x8) Uint16x8 - -// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. -// -// Asm: VPSRLVW, CPU Feature: AVX512EVEX -func (x Uint16x16) MaskedShiftRight(y Uint16x16, z Mask16x16) Uint16x16 - -// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. -// -// Asm: VPSRLVW, CPU Feature: AVX512EVEX -func (x Uint16x32) MaskedShiftRight(y Uint16x32, z Mask16x32) Uint16x32 - -// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. -// -// Asm: VPSRLVD, CPU Feature: AVX512EVEX -func (x Uint32x4) MaskedShiftRight(y Uint32x4, z Mask32x4) Uint32x4 - -// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. -// -// Asm: VPSRLVD, CPU Feature: AVX512EVEX -func (x Uint32x8) MaskedShiftRight(y Uint32x8, z Mask32x8) Uint32x8 - -// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. -// -// Asm: VPSRLVD, CPU Feature: AVX512EVEX -func (x Uint32x16) MaskedShiftRight(y Uint32x16, z Mask32x16) Uint32x16 - -// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. -// -// Asm: VPSRLVQ, CPU Feature: AVX512EVEX -func (x Uint64x2) MaskedShiftRight(y Uint64x2, z Mask64x2) Uint64x2 - -// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. -// -// Asm: VPSRLVQ, CPU Feature: AVX512EVEX -func (x Uint64x4) MaskedShiftRight(y Uint64x4, z Mask64x4) Uint64x4 - -// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. -// -// Asm: VPSRLVQ, CPU Feature: AVX512EVEX -func (x Uint64x8) MaskedShiftRight(y Uint64x8, z Mask64x8) Uint64x8 - -/* MaskedShiftRightAndFillUpperFrom */ - -// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. -// -// Asm: VPSHRDVW, CPU Feature: AVX512EVEX -func (x Int16x8) MaskedShiftRightAndFillUpperFrom(y Int16x8, z Int16x8, u Mask16x8) Int16x8 - -// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. -// -// Asm: VPSHRDVW, CPU Feature: AVX512EVEX -func (x Int16x16) MaskedShiftRightAndFillUpperFrom(y Int16x16, z Int16x16, u Mask16x16) Int16x16 - -// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. -// -// Asm: VPSHRDVW, CPU Feature: AVX512EVEX -func (x Int16x32) MaskedShiftRightAndFillUpperFrom(y Int16x32, z Int16x32, u Mask16x32) Int16x32 - -// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. -// -// Asm: VPSHRDVD, CPU Feature: AVX512EVEX -func (x Int32x4) MaskedShiftRightAndFillUpperFrom(y Int32x4, z Int32x4, u Mask32x4) Int32x4 - -// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. -// -// Asm: VPSHRDVD, CPU Feature: AVX512EVEX -func (x Int32x8) MaskedShiftRightAndFillUpperFrom(y Int32x8, z Int32x8, u Mask32x8) Int32x8 - -// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. -// -// Asm: VPSHRDVD, CPU Feature: AVX512EVEX -func (x Int32x16) MaskedShiftRightAndFillUpperFrom(y Int32x16, z Int32x16, u Mask32x16) Int32x16 - -// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. -// -// Asm: VPSHRDVQ, CPU Feature: AVX512EVEX -func (x Int64x2) MaskedShiftRightAndFillUpperFrom(y Int64x2, z Int64x2, u Mask64x2) Int64x2 - -// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. -// -// Asm: VPSHRDVQ, CPU Feature: AVX512EVEX -func (x Int64x4) MaskedShiftRightAndFillUpperFrom(y Int64x4, z Int64x4, u Mask64x4) Int64x4 - -// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. -// -// Asm: VPSHRDVQ, CPU Feature: AVX512EVEX -func (x Int64x8) MaskedShiftRightAndFillUpperFrom(y Int64x8, z Int64x8, u Mask64x8) Int64x8 - -// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. -// -// Asm: VPSHRDVW, CPU Feature: AVX512EVEX -func (x Uint16x8) MaskedShiftRightAndFillUpperFrom(y Uint16x8, z Uint16x8, u Mask16x8) Uint16x8 - -// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. -// -// Asm: VPSHRDVW, CPU Feature: AVX512EVEX -func (x Uint16x16) MaskedShiftRightAndFillUpperFrom(y Uint16x16, z Uint16x16, u Mask16x16) Uint16x16 - -// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. -// -// Asm: VPSHRDVW, CPU Feature: AVX512EVEX -func (x Uint16x32) MaskedShiftRightAndFillUpperFrom(y Uint16x32, z Uint16x32, u Mask16x32) Uint16x32 - -// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. -// -// Asm: VPSHRDVD, CPU Feature: AVX512EVEX -func (x Uint32x4) MaskedShiftRightAndFillUpperFrom(y Uint32x4, z Uint32x4, u Mask32x4) Uint32x4 - -// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. -// -// Asm: VPSHRDVD, CPU Feature: AVX512EVEX -func (x Uint32x8) MaskedShiftRightAndFillUpperFrom(y Uint32x8, z Uint32x8, u Mask32x8) Uint32x8 - -// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. -// -// Asm: VPSHRDVD, CPU Feature: AVX512EVEX -func (x Uint32x16) MaskedShiftRightAndFillUpperFrom(y Uint32x16, z Uint32x16, u Mask32x16) Uint32x16 - -// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. -// -// Asm: VPSHRDVQ, CPU Feature: AVX512EVEX -func (x Uint64x2) MaskedShiftRightAndFillUpperFrom(y Uint64x2, z Uint64x2, u Mask64x2) Uint64x2 - -// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. -// -// Asm: VPSHRDVQ, CPU Feature: AVX512EVEX -func (x Uint64x4) MaskedShiftRightAndFillUpperFrom(y Uint64x4, z Uint64x4, u Mask64x4) Uint64x4 - -// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. -// -// Asm: VPSHRDVQ, CPU Feature: AVX512EVEX -func (x Uint64x8) MaskedShiftRightAndFillUpperFrom(y Uint64x8, z Uint64x8, u Mask64x8) Uint64x8 - -/* MaskedShiftRightSignExtended */ - -// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -// -// Asm: VPSRAVW, CPU Feature: AVX512EVEX -func (x Int16x8) MaskedShiftRightSignExtended(y Int16x8, z Mask16x8) Int16x8 - -// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -// -// Asm: VPSRAVW, CPU Feature: AVX512EVEX -func (x Int16x16) MaskedShiftRightSignExtended(y Int16x16, z Mask16x16) Int16x16 - -// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -// -// Asm: VPSRAVW, CPU Feature: AVX512EVEX -func (x Int16x32) MaskedShiftRightSignExtended(y Int16x32, z Mask16x32) Int16x32 - -// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -// -// Asm: VPSRAVD, CPU Feature: AVX512EVEX -func (x Int32x4) MaskedShiftRightSignExtended(y Int32x4, z Mask32x4) Int32x4 - -// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -// -// Asm: VPSRAVD, CPU Feature: AVX512EVEX -func (x Int32x8) MaskedShiftRightSignExtended(y Int32x8, z Mask32x8) Int32x8 - -// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -// -// Asm: VPSRAVD, CPU Feature: AVX512EVEX -func (x Int32x16) MaskedShiftRightSignExtended(y Int32x16, z Mask32x16) Int32x16 - -// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -// -// Asm: VPSRAVQ, CPU Feature: AVX512EVEX -func (x Int64x2) MaskedShiftRightSignExtended(y Int64x2, z Mask64x2) Int64x2 - -// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -// -// Asm: VPSRAVQ, CPU Feature: AVX512EVEX -func (x Int64x4) MaskedShiftRightSignExtended(y Int64x4, z Mask64x4) Int64x4 - -// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -// -// Asm: VPSRAVQ, CPU Feature: AVX512EVEX -func (x Int64x8) MaskedShiftRightSignExtended(y Int64x8, z Mask64x8) Int64x8 - -// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -// -// Asm: VPSRAVW, CPU Feature: AVX512EVEX -func (x Uint16x8) MaskedShiftRightSignExtended(y Uint16x8, z Mask16x8) Uint16x8 - -// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -// -// Asm: VPSRAVW, CPU Feature: AVX512EVEX -func (x Uint16x16) MaskedShiftRightSignExtended(y Uint16x16, z Mask16x16) Uint16x16 - -// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -// -// Asm: VPSRAVW, CPU Feature: AVX512EVEX -func (x Uint16x32) MaskedShiftRightSignExtended(y Uint16x32, z Mask16x32) Uint16x32 - -// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -// -// Asm: VPSRAVD, CPU Feature: AVX512EVEX -func (x Uint32x4) MaskedShiftRightSignExtended(y Uint32x4, z Mask32x4) Uint32x4 - -// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -// -// Asm: VPSRAVD, CPU Feature: AVX512EVEX -func (x Uint32x8) MaskedShiftRightSignExtended(y Uint32x8, z Mask32x8) Uint32x8 - -// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -// -// Asm: VPSRAVD, CPU Feature: AVX512EVEX -func (x Uint32x16) MaskedShiftRightSignExtended(y Uint32x16, z Mask32x16) Uint32x16 - -// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -// -// Asm: VPSRAVQ, CPU Feature: AVX512EVEX -func (x Uint64x2) MaskedShiftRightSignExtended(y Uint64x2, z Mask64x2) Uint64x2 - -// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -// -// Asm: VPSRAVQ, CPU Feature: AVX512EVEX -func (x Uint64x4) MaskedShiftRightSignExtended(y Uint64x4, z Mask64x4) Uint64x4 - -// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -// -// Asm: VPSRAVQ, CPU Feature: AVX512EVEX -func (x Uint64x8) MaskedShiftRightSignExtended(y Uint64x8, z Mask64x8) Uint64x8 - -/* MaskedSqrt */ - -// Sqrt computes the square root of each element. -// -// Asm: VSQRTPS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedSqrt(y Mask32x4) Float32x4 - -// Sqrt computes the square root of each element. -// -// Asm: VSQRTPS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedSqrt(y Mask32x8) Float32x8 - -// Sqrt computes the square root of each element. -// -// Asm: VSQRTPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedSqrt(y Mask32x16) Float32x16 - -// Sqrt computes the square root of each element. -// -// Asm: VSQRTPD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedSqrt(y Mask64x2) Float64x2 - -// Sqrt computes the square root of each element. -// -// Asm: VSQRTPD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedSqrt(y Mask64x4) Float64x4 - -// Sqrt computes the square root of each element. -// -// Asm: VSQRTPD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedSqrt(y Mask64x8) Float64x8 - -/* MaskedSub */ - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VSUBPS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedSub(y Float32x4, z Mask32x4) Float32x4 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VSUBPS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedSub(y Float32x8, z Mask32x8) Float32x8 - -// Sub subtracts corresponding elements of two vectors. -// -// Asm: VSUBPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedSub(y Float32x16, z Mask32x16) Float32x16 - -// Sub subtracts corresponding elements of two vectors. +// Less compares for less than. // -// Asm: VSUBPD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedSub(y Float64x2, z Mask64x2) Float64x2 +// Asm: VPCMPB, CPU Feature: AVX512EVEX +func (x Int8x16) LessMasked(y Int8x16, z Mask8x16) Mask8x16 -// Sub subtracts corresponding elements of two vectors. +// Less compares for less than. // -// Asm: VSUBPD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedSub(y Float64x4, z Mask64x4) Float64x4 +// Asm: VPCMPB, CPU Feature: AVX512EVEX +func (x Int8x32) LessMasked(y Int8x32, z Mask8x32) Mask8x32 -// Sub subtracts corresponding elements of two vectors. +// Less compares for less than. // -// Asm: VSUBPD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedSub(y Float64x8, z Mask64x8) Float64x8 +// Asm: VPCMPB, CPU Feature: AVX512EVEX +func (x Int8x64) LessMasked(y Int8x64, z Mask8x64) Mask8x64 -// Sub subtracts corresponding elements of two vectors. +// Less compares for less than. // -// Asm: VPSUBB, CPU Feature: AVX512EVEX -func (x Int8x16) MaskedSub(y Int8x16, z Mask8x16) Int8x16 +// Asm: VPCMPW, CPU Feature: AVX512EVEX +func (x Int16x8) LessMasked(y Int16x8, z Mask16x8) Mask16x8 -// Sub subtracts corresponding elements of two vectors. +// Less compares for less than. // -// Asm: VPSUBB, CPU Feature: AVX512EVEX -func (x Int8x32) MaskedSub(y Int8x32, z Mask8x32) Int8x32 +// Asm: VPCMPW, CPU Feature: AVX512EVEX +func (x Int16x16) LessMasked(y Int16x16, z Mask16x16) Mask16x16 -// Sub subtracts corresponding elements of two vectors. +// Less compares for less than. // -// Asm: VPSUBB, CPU Feature: AVX512EVEX -func (x Int8x64) MaskedSub(y Int8x64, z Mask8x64) Int8x64 +// Asm: VPCMPW, CPU Feature: AVX512EVEX +func (x Int16x32) LessMasked(y Int16x32, z Mask16x32) Mask16x32 -// Sub subtracts corresponding elements of two vectors. +// Less compares for less than. // -// Asm: VPSUBW, CPU Feature: AVX512EVEX -func (x Int16x8) MaskedSub(y Int16x8, z Mask16x8) Int16x8 +// Asm: VPCMPD, CPU Feature: AVX512EVEX +func (x Int32x4) LessMasked(y Int32x4, z Mask32x4) Mask32x4 -// Sub subtracts corresponding elements of two vectors. +// Less compares for less than. // -// Asm: VPSUBW, CPU Feature: AVX512EVEX -func (x Int16x16) MaskedSub(y Int16x16, z Mask16x16) Int16x16 +// Asm: VPCMPD, CPU Feature: AVX512EVEX +func (x Int32x8) LessMasked(y Int32x8, z Mask32x8) Mask32x8 -// Sub subtracts corresponding elements of two vectors. +// Less compares for less than. // -// Asm: VPSUBW, CPU Feature: AVX512EVEX -func (x Int16x32) MaskedSub(y Int16x32, z Mask16x32) Int16x32 +// Asm: VPCMPD, CPU Feature: AVX512EVEX +func (x Int32x16) LessMasked(y Int32x16, z Mask32x16) Mask32x16 -// Sub subtracts corresponding elements of two vectors. +// Less compares for less than. // -// Asm: VPSUBD, CPU Feature: AVX512EVEX -func (x Int32x4) MaskedSub(y Int32x4, z Mask32x4) Int32x4 +// Asm: VPCMPQ, CPU Feature: AVX512EVEX +func (x Int64x2) LessMasked(y Int64x2, z Mask64x2) Mask64x2 -// Sub subtracts corresponding elements of two vectors. +// Less compares for less than. // -// Asm: VPSUBD, CPU Feature: AVX512EVEX -func (x Int32x8) MaskedSub(y Int32x8, z Mask32x8) Int32x8 +// Asm: VPCMPQ, CPU Feature: AVX512EVEX +func (x Int64x4) LessMasked(y Int64x4, z Mask64x4) Mask64x4 -// Sub subtracts corresponding elements of two vectors. +// Less compares for less than. // -// Asm: VPSUBD, CPU Feature: AVX512EVEX -func (x Int32x16) MaskedSub(y Int32x16, z Mask32x16) Int32x16 +// Asm: VPCMPQ, CPU Feature: AVX512EVEX +func (x Int64x8) LessMasked(y Int64x8, z Mask64x8) Mask64x8 -// Sub subtracts corresponding elements of two vectors. +// Less compares for less than. // -// Asm: VPSUBQ, CPU Feature: AVX512EVEX -func (x Int64x2) MaskedSub(y Int64x2, z Mask64x2) Int64x2 +// Asm: VPCMPUB, CPU Feature: AVX512EVEX +func (x Uint8x16) LessMasked(y Uint8x16, z Mask8x16) Mask8x16 -// Sub subtracts corresponding elements of two vectors. +// Less compares for less than. // -// Asm: VPSUBQ, CPU Feature: AVX512EVEX -func (x Int64x4) MaskedSub(y Int64x4, z Mask64x4) Int64x4 +// Asm: VPCMPUB, CPU Feature: AVX512EVEX +func (x Uint8x32) LessMasked(y Uint8x32, z Mask8x32) Mask8x32 -// Sub subtracts corresponding elements of two vectors. +// Less compares for less than. // -// Asm: VPSUBQ, CPU Feature: AVX512EVEX -func (x Int64x8) MaskedSub(y Int64x8, z Mask64x8) Int64x8 +// Asm: VPCMPUB, CPU Feature: AVX512EVEX +func (x Uint8x64) LessMasked(y Uint8x64, z Mask8x64) Mask8x64 -// Sub subtracts corresponding elements of two vectors. +// Less compares for less than. // -// Asm: VPSUBB, CPU Feature: AVX512EVEX -func (x Uint8x16) MaskedSub(y Uint8x16, z Mask8x16) Uint8x16 +// Asm: VPCMPUW, CPU Feature: AVX512EVEX +func (x Uint16x8) LessMasked(y Uint16x8, z Mask16x8) Mask16x8 -// Sub subtracts corresponding elements of two vectors. +// Less compares for less than. // -// Asm: VPSUBB, CPU Feature: AVX512EVEX -func (x Uint8x32) MaskedSub(y Uint8x32, z Mask8x32) Uint8x32 +// Asm: VPCMPUW, CPU Feature: AVX512EVEX +func (x Uint16x16) LessMasked(y Uint16x16, z Mask16x16) Mask16x16 -// Sub subtracts corresponding elements of two vectors. +// Less compares for less than. // -// Asm: VPSUBB, CPU Feature: AVX512EVEX -func (x Uint8x64) MaskedSub(y Uint8x64, z Mask8x64) Uint8x64 +// Asm: VPCMPUW, CPU Feature: AVX512EVEX +func (x Uint16x32) LessMasked(y Uint16x32, z Mask16x32) Mask16x32 -// Sub subtracts corresponding elements of two vectors. +// Less compares for less than. // -// Asm: VPSUBW, CPU Feature: AVX512EVEX -func (x Uint16x8) MaskedSub(y Uint16x8, z Mask16x8) Uint16x8 +// Asm: VPCMPUD, CPU Feature: AVX512EVEX +func (x Uint32x4) LessMasked(y Uint32x4, z Mask32x4) Mask32x4 -// Sub subtracts corresponding elements of two vectors. +// Less compares for less than. // -// Asm: VPSUBW, CPU Feature: AVX512EVEX -func (x Uint16x16) MaskedSub(y Uint16x16, z Mask16x16) Uint16x16 +// Asm: VPCMPUD, CPU Feature: AVX512EVEX +func (x Uint32x8) LessMasked(y Uint32x8, z Mask32x8) Mask32x8 -// Sub subtracts corresponding elements of two vectors. +// Less compares for less than. // -// Asm: VPSUBW, CPU Feature: AVX512EVEX -func (x Uint16x32) MaskedSub(y Uint16x32, z Mask16x32) Uint16x32 +// Asm: VPCMPUD, CPU Feature: AVX512EVEX +func (x Uint32x16) LessMasked(y Uint32x16, z Mask32x16) Mask32x16 -// Sub subtracts corresponding elements of two vectors. +// Less compares for less than. // -// Asm: VPSUBD, CPU Feature: AVX512EVEX -func (x Uint32x4) MaskedSub(y Uint32x4, z Mask32x4) Uint32x4 +// Asm: VPCMPUQ, CPU Feature: AVX512EVEX +func (x Uint64x2) LessMasked(y Uint64x2, z Mask64x2) Mask64x2 -// Sub subtracts corresponding elements of two vectors. +// Less compares for less than. // -// Asm: VPSUBD, CPU Feature: AVX512EVEX -func (x Uint32x8) MaskedSub(y Uint32x8, z Mask32x8) Uint32x8 +// Asm: VPCMPUQ, CPU Feature: AVX512EVEX +func (x Uint64x4) LessMasked(y Uint64x4, z Mask64x4) Mask64x4 -// Sub subtracts corresponding elements of two vectors. +// Less compares for less than. // -// Asm: VPSUBD, CPU Feature: AVX512EVEX -func (x Uint32x16) MaskedSub(y Uint32x16, z Mask32x16) Uint32x16 +// Asm: VPCMPUQ, CPU Feature: AVX512EVEX +func (x Uint64x8) LessMasked(y Uint64x8, z Mask64x8) Mask64x8 -// Sub subtracts corresponding elements of two vectors. +/* Max */ + +// Max computes the maximum of corresponding elements. // -// Asm: VPSUBQ, CPU Feature: AVX512EVEX -func (x Uint64x2) MaskedSub(y Uint64x2, z Mask64x2) Uint64x2 +// Asm: VMAXPS, CPU Feature: AVX +func (x Float32x4) Max(y Float32x4) Float32x4 -// Sub subtracts corresponding elements of two vectors. +// Max computes the maximum of corresponding elements. // -// Asm: VPSUBQ, CPU Feature: AVX512EVEX -func (x Uint64x4) MaskedSub(y Uint64x4, z Mask64x4) Uint64x4 +// Asm: VMAXPS, CPU Feature: AVX +func (x Float32x8) Max(y Float32x8) Float32x8 -// Sub subtracts corresponding elements of two vectors. +// Max computes the maximum of corresponding elements. // -// Asm: VPSUBQ, CPU Feature: AVX512EVEX -func (x Uint64x8) MaskedSub(y Uint64x8, z Mask64x8) Uint64x8 +// Asm: VMAXPS, CPU Feature: AVX512EVEX +func (x Float32x16) Max(y Float32x16) Float32x16 -/* MaskedTruncWithPrecision */ +// Max computes the maximum of corresponding elements. +// +// Asm: VMAXPD, CPU Feature: AVX +func (x Float64x2) Max(y Float64x2) Float64x2 -// TruncWithPrecision truncates elements with specified precision. +// Max computes the maximum of corresponding elements. // -// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x4) MaskedTruncWithPrecision(imm uint8, y Mask32x4) Float32x4 +// Asm: VMAXPD, CPU Feature: AVX +func (x Float64x4) Max(y Float64x4) Float64x4 -// TruncWithPrecision truncates elements with specified precision. +// Max computes the maximum of corresponding elements. // -// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x8) MaskedTruncWithPrecision(imm uint8, y Mask32x8) Float32x8 +// Asm: VMAXPD, CPU Feature: AVX512EVEX +func (x Float64x8) Max(y Float64x8) Float64x8 -// TruncWithPrecision truncates elements with specified precision. +// Max computes the maximum of corresponding elements. // -// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX -func (x Float32x16) MaskedTruncWithPrecision(imm uint8, y Mask32x16) Float32x16 +// Asm: VPMAXSB, CPU Feature: AVX +func (x Int8x16) Max(y Int8x16) Int8x16 -// TruncWithPrecision truncates elements with specified precision. +// Max computes the maximum of corresponding elements. // -// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX -func (x Float64x2) MaskedTruncWithPrecision(imm uint8, y Mask64x2) Float64x2 +// Asm: VPMAXSB, CPU Feature: AVX2 +func (x Int8x32) Max(y Int8x32) Int8x32 -// TruncWithPrecision truncates elements with specified precision. +// Max computes the maximum of corresponding elements. // -// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX -func (x Float64x4) MaskedTruncWithPrecision(imm uint8, y Mask64x4) Float64x4 +// Asm: VPMAXSB, CPU Feature: AVX512EVEX +func (x Int8x64) Max(y Int8x64) Int8x64 -// TruncWithPrecision truncates elements with specified precision. +// Max computes the maximum of corresponding elements. // -// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX -func (x Float64x8) MaskedTruncWithPrecision(imm uint8, y Mask64x8) Float64x8 +// Asm: VPMAXSW, CPU Feature: AVX +func (x Int16x8) Max(y Int16x8) Int16x8 -/* MaskedUnsignedSignedQuadDotProdAccumulate */ +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXSW, CPU Feature: AVX2 +func (x Int16x16) Max(y Int16x16) Int16x16 -// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. +// Max computes the maximum of corresponding elements. // -// Asm: VPDPBUSD, CPU Feature: AVX512EVEX -func (x Int32x4) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int8x16, u Mask32x4) Int32x4 +// Asm: VPMAXSW, CPU Feature: AVX512EVEX +func (x Int16x32) Max(y Int16x32) Int16x32 -// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. +// Max computes the maximum of corresponding elements. // -// Asm: VPDPBUSD, CPU Feature: AVX512EVEX -func (x Int32x8) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int8x32, u Mask32x8) Int32x8 +// Asm: VPMAXSD, CPU Feature: AVX +func (x Int32x4) Max(y Int32x4) Int32x4 -// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. +// Max computes the maximum of corresponding elements. // -// Asm: VPDPBUSD, CPU Feature: AVX512EVEX -func (x Int32x16) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int8x64, u Mask32x16) Int32x16 +// Asm: VPMAXSD, CPU Feature: AVX2 +func (x Int32x8) Max(y Int32x8) Int32x8 -// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. +// Max computes the maximum of corresponding elements. // -// Asm: VPDPBUSD, CPU Feature: AVX512EVEX -func (x Uint32x4) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x16, z Int8x16, u Mask32x4) Uint32x4 +// Asm: VPMAXSD, CPU Feature: AVX512EVEX +func (x Int32x16) Max(y Int32x16) Int32x16 -// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. +// Max computes the maximum of corresponding elements. // -// Asm: VPDPBUSD, CPU Feature: AVX512EVEX -func (x Uint32x8) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int8x32, u Mask32x8) Uint32x8 +// Asm: VPMAXSQ, CPU Feature: AVX512EVEX +func (x Int64x2) Max(y Int64x2) Int64x2 -// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. +// Max computes the maximum of corresponding elements. // -// Asm: VPDPBUSD, CPU Feature: AVX512EVEX -func (x Uint32x16) MaskedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int8x64, u Mask32x16) Uint32x16 +// Asm: VPMAXSQ, CPU Feature: AVX512EVEX +func (x Int64x4) Max(y Int64x4) Int64x4 -/* MaskedXor */ +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXSQ, CPU Feature: AVX512EVEX +func (x Int64x8) Max(y Int64x8) Int64x8 -// Xor performs a masked bitwise XOR operation between two vectors. +// Max computes the maximum of corresponding elements. // -// Asm: VPXORD, CPU Feature: AVX512EVEX -func (x Int32x4) MaskedXor(y Int32x4, z Mask32x4) Int32x4 +// Asm: VPMAXUB, CPU Feature: AVX +func (x Uint8x16) Max(y Uint8x16) Uint8x16 -// Xor performs a masked bitwise XOR operation between two vectors. +// Max computes the maximum of corresponding elements. // -// Asm: VPXORD, CPU Feature: AVX512EVEX -func (x Int32x8) MaskedXor(y Int32x8, z Mask32x8) Int32x8 +// Asm: VPMAXUB, CPU Feature: AVX2 +func (x Uint8x32) Max(y Uint8x32) Uint8x32 -// Xor performs a masked bitwise XOR operation between two vectors. +// Max computes the maximum of corresponding elements. // -// Asm: VPXORD, CPU Feature: AVX512EVEX -func (x Int32x16) MaskedXor(y Int32x16, z Mask32x16) Int32x16 +// Asm: VPMAXUB, CPU Feature: AVX512EVEX +func (x Uint8x64) Max(y Uint8x64) Uint8x64 -// Xor performs a masked bitwise XOR operation between two vectors. +// Max computes the maximum of corresponding elements. // -// Asm: VPXORQ, CPU Feature: AVX512EVEX -func (x Int64x2) MaskedXor(y Int64x2, z Mask64x2) Int64x2 +// Asm: VPMAXUW, CPU Feature: AVX +func (x Uint16x8) Max(y Uint16x8) Uint16x8 -// Xor performs a masked bitwise XOR operation between two vectors. +// Max computes the maximum of corresponding elements. // -// Asm: VPXORQ, CPU Feature: AVX512EVEX -func (x Int64x4) MaskedXor(y Int64x4, z Mask64x4) Int64x4 +// Asm: VPMAXUW, CPU Feature: AVX2 +func (x Uint16x16) Max(y Uint16x16) Uint16x16 -// Xor performs a masked bitwise XOR operation between two vectors. +// Max computes the maximum of corresponding elements. // -// Asm: VPXORQ, CPU Feature: AVX512EVEX -func (x Int64x8) MaskedXor(y Int64x8, z Mask64x8) Int64x8 +// Asm: VPMAXUW, CPU Feature: AVX512EVEX +func (x Uint16x32) Max(y Uint16x32) Uint16x32 -// Xor performs a masked bitwise XOR operation between two vectors. +// Max computes the maximum of corresponding elements. // -// Asm: VPXORD, CPU Feature: AVX512EVEX -func (x Uint32x4) MaskedXor(y Uint32x4, z Mask32x4) Uint32x4 +// Asm: VPMAXUD, CPU Feature: AVX +func (x Uint32x4) Max(y Uint32x4) Uint32x4 -// Xor performs a masked bitwise XOR operation between two vectors. +// Max computes the maximum of corresponding elements. // -// Asm: VPXORD, CPU Feature: AVX512EVEX -func (x Uint32x8) MaskedXor(y Uint32x8, z Mask32x8) Uint32x8 +// Asm: VPMAXUD, CPU Feature: AVX2 +func (x Uint32x8) Max(y Uint32x8) Uint32x8 -// Xor performs a masked bitwise XOR operation between two vectors. +// Max computes the maximum of corresponding elements. // -// Asm: VPXORD, CPU Feature: AVX512EVEX -func (x Uint32x16) MaskedXor(y Uint32x16, z Mask32x16) Uint32x16 +// Asm: VPMAXUD, CPU Feature: AVX512EVEX +func (x Uint32x16) Max(y Uint32x16) Uint32x16 -// Xor performs a masked bitwise XOR operation between two vectors. +// Max computes the maximum of corresponding elements. // -// Asm: VPXORQ, CPU Feature: AVX512EVEX -func (x Uint64x2) MaskedXor(y Uint64x2, z Mask64x2) Uint64x2 +// Asm: VPMAXUQ, CPU Feature: AVX512EVEX +func (x Uint64x2) Max(y Uint64x2) Uint64x2 -// Xor performs a masked bitwise XOR operation between two vectors. +// Max computes the maximum of corresponding elements. // -// Asm: VPXORQ, CPU Feature: AVX512EVEX -func (x Uint64x4) MaskedXor(y Uint64x4, z Mask64x4) Uint64x4 +// Asm: VPMAXUQ, CPU Feature: AVX512EVEX +func (x Uint64x4) Max(y Uint64x4) Uint64x4 -// Xor performs a masked bitwise XOR operation between two vectors. +// Max computes the maximum of corresponding elements. // -// Asm: VPXORQ, CPU Feature: AVX512EVEX -func (x Uint64x8) MaskedXor(y Uint64x8, z Mask64x8) Uint64x8 +// Asm: VPMAXUQ, CPU Feature: AVX512EVEX +func (x Uint64x8) Max(y Uint64x8) Uint64x8 -/* Max */ +/* MaxMasked */ // Max computes the maximum of corresponding elements. // -// Asm: VMAXPS, CPU Feature: AVX -func (x Float32x4) Max(y Float32x4) Float32x4 +// Asm: VMAXPS, CPU Feature: AVX512EVEX +func (x Float32x4) MaxMasked(y Float32x4, z Mask32x4) Float32x4 // Max computes the maximum of corresponding elements. // -// Asm: VMAXPS, CPU Feature: AVX -func (x Float32x8) Max(y Float32x8) Float32x8 +// Asm: VMAXPS, CPU Feature: AVX512EVEX +func (x Float32x8) MaxMasked(y Float32x8, z Mask32x8) Float32x8 // Max computes the maximum of corresponding elements. // // Asm: VMAXPS, CPU Feature: AVX512EVEX -func (x Float32x16) Max(y Float32x16) Float32x16 +func (x Float32x16) MaxMasked(y Float32x16, z Mask32x16) Float32x16 // Max computes the maximum of corresponding elements. // -// Asm: VMAXPD, CPU Feature: AVX -func (x Float64x2) Max(y Float64x2) Float64x2 +// Asm: VMAXPD, CPU Feature: AVX512EVEX +func (x Float64x2) MaxMasked(y Float64x2, z Mask64x2) Float64x2 // Max computes the maximum of corresponding elements. // -// Asm: VMAXPD, CPU Feature: AVX -func (x Float64x4) Max(y Float64x4) Float64x4 +// Asm: VMAXPD, CPU Feature: AVX512EVEX +func (x Float64x4) MaxMasked(y Float64x4, z Mask64x4) Float64x4 // Max computes the maximum of corresponding elements. // // Asm: VMAXPD, CPU Feature: AVX512EVEX -func (x Float64x8) Max(y Float64x8) Float64x8 +func (x Float64x8) MaxMasked(y Float64x8, z Mask64x8) Float64x8 // Max computes the maximum of corresponding elements. // -// Asm: VPMAXSB, CPU Feature: AVX -func (x Int8x16) Max(y Int8x16) Int8x16 +// Asm: VPMAXSB, CPU Feature: AVX512EVEX +func (x Int8x16) MaxMasked(y Int8x16, z Mask8x16) Int8x16 // Max computes the maximum of corresponding elements. // -// Asm: VPMAXSB, CPU Feature: AVX2 -func (x Int8x32) Max(y Int8x32) Int8x32 +// Asm: VPMAXSB, CPU Feature: AVX512EVEX +func (x Int8x32) MaxMasked(y Int8x32, z Mask8x32) Int8x32 // Max computes the maximum of corresponding elements. // // Asm: VPMAXSB, CPU Feature: AVX512EVEX -func (x Int8x64) Max(y Int8x64) Int8x64 +func (x Int8x64) MaxMasked(y Int8x64, z Mask8x64) Int8x64 // Max computes the maximum of corresponding elements. // -// Asm: VPMAXSW, CPU Feature: AVX -func (x Int16x8) Max(y Int16x8) Int16x8 +// Asm: VPMAXSW, CPU Feature: AVX512EVEX +func (x Int16x8) MaxMasked(y Int16x8, z Mask16x8) Int16x8 // Max computes the maximum of corresponding elements. // -// Asm: VPMAXSW, CPU Feature: AVX2 -func (x Int16x16) Max(y Int16x16) Int16x16 +// Asm: VPMAXSW, CPU Feature: AVX512EVEX +func (x Int16x16) MaxMasked(y Int16x16, z Mask16x16) Int16x16 // Max computes the maximum of corresponding elements. // // Asm: VPMAXSW, CPU Feature: AVX512EVEX -func (x Int16x32) Max(y Int16x32) Int16x32 +func (x Int16x32) MaxMasked(y Int16x32, z Mask16x32) Int16x32 // Max computes the maximum of corresponding elements. // -// Asm: VPMAXSD, CPU Feature: AVX -func (x Int32x4) Max(y Int32x4) Int32x4 +// Asm: VPMAXSD, CPU Feature: AVX512EVEX +func (x Int32x4) MaxMasked(y Int32x4, z Mask32x4) Int32x4 // Max computes the maximum of corresponding elements. // -// Asm: VPMAXSD, CPU Feature: AVX2 -func (x Int32x8) Max(y Int32x8) Int32x8 +// Asm: VPMAXSD, CPU Feature: AVX512EVEX +func (x Int32x8) MaxMasked(y Int32x8, z Mask32x8) Int32x8 // Max computes the maximum of corresponding elements. // // Asm: VPMAXSD, CPU Feature: AVX512EVEX -func (x Int32x16) Max(y Int32x16) Int32x16 +func (x Int32x16) MaxMasked(y Int32x16, z Mask32x16) Int32x16 // Max computes the maximum of corresponding elements. // // Asm: VPMAXSQ, CPU Feature: AVX512EVEX -func (x Int64x2) Max(y Int64x2) Int64x2 +func (x Int64x2) MaxMasked(y Int64x2, z Mask64x2) Int64x2 // Max computes the maximum of corresponding elements. // // Asm: VPMAXSQ, CPU Feature: AVX512EVEX -func (x Int64x4) Max(y Int64x4) Int64x4 +func (x Int64x4) MaxMasked(y Int64x4, z Mask64x4) Int64x4 // Max computes the maximum of corresponding elements. // // Asm: VPMAXSQ, CPU Feature: AVX512EVEX -func (x Int64x8) Max(y Int64x8) Int64x8 +func (x Int64x8) MaxMasked(y Int64x8, z Mask64x8) Int64x8 // Max computes the maximum of corresponding elements. // -// Asm: VPMAXUB, CPU Feature: AVX -func (x Uint8x16) Max(y Uint8x16) Uint8x16 +// Asm: VPMAXUB, CPU Feature: AVX512EVEX +func (x Uint8x16) MaxMasked(y Uint8x16, z Mask8x16) Uint8x16 // Max computes the maximum of corresponding elements. // -// Asm: VPMAXUB, CPU Feature: AVX2 -func (x Uint8x32) Max(y Uint8x32) Uint8x32 +// Asm: VPMAXUB, CPU Feature: AVX512EVEX +func (x Uint8x32) MaxMasked(y Uint8x32, z Mask8x32) Uint8x32 // Max computes the maximum of corresponding elements. // // Asm: VPMAXUB, CPU Feature: AVX512EVEX -func (x Uint8x64) Max(y Uint8x64) Uint8x64 +func (x Uint8x64) MaxMasked(y Uint8x64, z Mask8x64) Uint8x64 // Max computes the maximum of corresponding elements. // -// Asm: VPMAXUW, CPU Feature: AVX -func (x Uint16x8) Max(y Uint16x8) Uint16x8 +// Asm: VPMAXUW, CPU Feature: AVX512EVEX +func (x Uint16x8) MaxMasked(y Uint16x8, z Mask16x8) Uint16x8 // Max computes the maximum of corresponding elements. // -// Asm: VPMAXUW, CPU Feature: AVX2 -func (x Uint16x16) Max(y Uint16x16) Uint16x16 +// Asm: VPMAXUW, CPU Feature: AVX512EVEX +func (x Uint16x16) MaxMasked(y Uint16x16, z Mask16x16) Uint16x16 // Max computes the maximum of corresponding elements. // // Asm: VPMAXUW, CPU Feature: AVX512EVEX -func (x Uint16x32) Max(y Uint16x32) Uint16x32 +func (x Uint16x32) MaxMasked(y Uint16x32, z Mask16x32) Uint16x32 // Max computes the maximum of corresponding elements. // -// Asm: VPMAXUD, CPU Feature: AVX -func (x Uint32x4) Max(y Uint32x4) Uint32x4 +// Asm: VPMAXUD, CPU Feature: AVX512EVEX +func (x Uint32x4) MaxMasked(y Uint32x4, z Mask32x4) Uint32x4 // Max computes the maximum of corresponding elements. // -// Asm: VPMAXUD, CPU Feature: AVX2 -func (x Uint32x8) Max(y Uint32x8) Uint32x8 +// Asm: VPMAXUD, CPU Feature: AVX512EVEX +func (x Uint32x8) MaxMasked(y Uint32x8, z Mask32x8) Uint32x8 // Max computes the maximum of corresponding elements. // // Asm: VPMAXUD, CPU Feature: AVX512EVEX -func (x Uint32x16) Max(y Uint32x16) Uint32x16 +func (x Uint32x16) MaxMasked(y Uint32x16, z Mask32x16) Uint32x16 // Max computes the maximum of corresponding elements. // // Asm: VPMAXUQ, CPU Feature: AVX512EVEX -func (x Uint64x2) Max(y Uint64x2) Uint64x2 +func (x Uint64x2) MaxMasked(y Uint64x2, z Mask64x2) Uint64x2 // Max computes the maximum of corresponding elements. // // Asm: VPMAXUQ, CPU Feature: AVX512EVEX -func (x Uint64x4) Max(y Uint64x4) Uint64x4 +func (x Uint64x4) MaxMasked(y Uint64x4, z Mask64x4) Uint64x4 // Max computes the maximum of corresponding elements. // // Asm: VPMAXUQ, CPU Feature: AVX512EVEX -func (x Uint64x8) Max(y Uint64x8) Uint64x8 +func (x Uint64x8) MaxMasked(y Uint64x8, z Mask64x8) Uint64x8 /* Min */ @@ -6235,6 +3949,158 @@ func (x Uint64x4) Min(y Uint64x4) Uint64x4 // Asm: VPMINUQ, CPU Feature: AVX512EVEX func (x Uint64x8) Min(y Uint64x8) Uint64x8 +/* MinMasked */ + +// Min computes the minimum of corresponding elements. +// +// Asm: VMINPS, CPU Feature: AVX512EVEX +func (x Float32x4) MinMasked(y Float32x4, z Mask32x4) Float32x4 + +// Min computes the minimum of corresponding elements. +// +// Asm: VMINPS, CPU Feature: AVX512EVEX +func (x Float32x8) MinMasked(y Float32x8, z Mask32x8) Float32x8 + +// Min computes the minimum of corresponding elements. +// +// Asm: VMINPS, CPU Feature: AVX512EVEX +func (x Float32x16) MinMasked(y Float32x16, z Mask32x16) Float32x16 + +// Min computes the minimum of corresponding elements. +// +// Asm: VMINPD, CPU Feature: AVX512EVEX +func (x Float64x2) MinMasked(y Float64x2, z Mask64x2) Float64x2 + +// Min computes the minimum of corresponding elements. +// +// Asm: VMINPD, CPU Feature: AVX512EVEX +func (x Float64x4) MinMasked(y Float64x4, z Mask64x4) Float64x4 + +// Min computes the minimum of corresponding elements. +// +// Asm: VMINPD, CPU Feature: AVX512EVEX +func (x Float64x8) MinMasked(y Float64x8, z Mask64x8) Float64x8 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINSB, CPU Feature: AVX512EVEX +func (x Int8x16) MinMasked(y Int8x16, z Mask8x16) Int8x16 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINSB, CPU Feature: AVX512EVEX +func (x Int8x32) MinMasked(y Int8x32, z Mask8x32) Int8x32 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINSB, CPU Feature: AVX512EVEX +func (x Int8x64) MinMasked(y Int8x64, z Mask8x64) Int8x64 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINSW, CPU Feature: AVX512EVEX +func (x Int16x8) MinMasked(y Int16x8, z Mask16x8) Int16x8 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINSW, CPU Feature: AVX512EVEX +func (x Int16x16) MinMasked(y Int16x16, z Mask16x16) Int16x16 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINSW, CPU Feature: AVX512EVEX +func (x Int16x32) MinMasked(y Int16x32, z Mask16x32) Int16x32 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINSD, CPU Feature: AVX512EVEX +func (x Int32x4) MinMasked(y Int32x4, z Mask32x4) Int32x4 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINSD, CPU Feature: AVX512EVEX +func (x Int32x8) MinMasked(y Int32x8, z Mask32x8) Int32x8 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINSD, CPU Feature: AVX512EVEX +func (x Int32x16) MinMasked(y Int32x16, z Mask32x16) Int32x16 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINSQ, CPU Feature: AVX512EVEX +func (x Int64x2) MinMasked(y Int64x2, z Mask64x2) Int64x2 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINSQ, CPU Feature: AVX512EVEX +func (x Int64x4) MinMasked(y Int64x4, z Mask64x4) Int64x4 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINSQ, CPU Feature: AVX512EVEX +func (x Int64x8) MinMasked(y Int64x8, z Mask64x8) Int64x8 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINUB, CPU Feature: AVX512EVEX +func (x Uint8x16) MinMasked(y Uint8x16, z Mask8x16) Uint8x16 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINUB, CPU Feature: AVX512EVEX +func (x Uint8x32) MinMasked(y Uint8x32, z Mask8x32) Uint8x32 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINUB, CPU Feature: AVX512EVEX +func (x Uint8x64) MinMasked(y Uint8x64, z Mask8x64) Uint8x64 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINUW, CPU Feature: AVX512EVEX +func (x Uint16x8) MinMasked(y Uint16x8, z Mask16x8) Uint16x8 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINUW, CPU Feature: AVX512EVEX +func (x Uint16x16) MinMasked(y Uint16x16, z Mask16x16) Uint16x16 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINUW, CPU Feature: AVX512EVEX +func (x Uint16x32) MinMasked(y Uint16x32, z Mask16x32) Uint16x32 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINUD, CPU Feature: AVX512EVEX +func (x Uint32x4) MinMasked(y Uint32x4, z Mask32x4) Uint32x4 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINUD, CPU Feature: AVX512EVEX +func (x Uint32x8) MinMasked(y Uint32x8, z Mask32x8) Uint32x8 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINUD, CPU Feature: AVX512EVEX +func (x Uint32x16) MinMasked(y Uint32x16, z Mask32x16) Uint32x16 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINUQ, CPU Feature: AVX512EVEX +func (x Uint64x2) MinMasked(y Uint64x2, z Mask64x2) Uint64x2 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINUQ, CPU Feature: AVX512EVEX +func (x Uint64x4) MinMasked(y Uint64x4, z Mask64x4) Uint64x4 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINUQ, CPU Feature: AVX512EVEX +func (x Uint64x8) MinMasked(y Uint64x8, z Mask64x8) Uint64x8 + /* Mul */ // Mul multiplies corresponding elements of two vectors. @@ -6299,6 +4165,38 @@ func (x Float64x4) MulByPowOf2(y Float64x4) Float64x4 // Asm: VSCALEFPD, CPU Feature: AVX512EVEX func (x Float64x8) MulByPowOf2(y Float64x8) Float64x8 +/* MulByPowOf2Masked */ + +// MulByPowOf2 multiplies elements by a power of 2. +// +// Asm: VSCALEFPS, CPU Feature: AVX512EVEX +func (x Float32x4) MulByPowOf2Masked(y Float32x4, z Mask32x4) Float32x4 + +// MulByPowOf2 multiplies elements by a power of 2. +// +// Asm: VSCALEFPS, CPU Feature: AVX512EVEX +func (x Float32x8) MulByPowOf2Masked(y Float32x8, z Mask32x8) Float32x8 + +// MulByPowOf2 multiplies elements by a power of 2. +// +// Asm: VSCALEFPS, CPU Feature: AVX512EVEX +func (x Float32x16) MulByPowOf2Masked(y Float32x16, z Mask32x16) Float32x16 + +// MulByPowOf2 multiplies elements by a power of 2. +// +// Asm: VSCALEFPD, CPU Feature: AVX512EVEX +func (x Float64x2) MulByPowOf2Masked(y Float64x2, z Mask64x2) Float64x2 + +// MulByPowOf2 multiplies elements by a power of 2. +// +// Asm: VSCALEFPD, CPU Feature: AVX512EVEX +func (x Float64x4) MulByPowOf2Masked(y Float64x4, z Mask64x4) Float64x4 + +// MulByPowOf2 multiplies elements by a power of 2. +// +// Asm: VSCALEFPD, CPU Feature: AVX512EVEX +func (x Float64x8) MulByPowOf2Masked(y Float64x8, z Mask64x8) Float64x8 + /* MulEvenWiden */ // MulEvenWiden multiplies even-indexed elements, widening the result. @@ -6361,6 +4259,44 @@ func (x Uint64x4) MulEvenWiden(y Uint64x4) Uint64x4 // Asm: VPMULUDQ, CPU Feature: AVX512EVEX func (x Uint64x8) MulEvenWiden(y Uint64x8) Uint64x8 +/* MulEvenWidenMasked */ + +// MulEvenWiden multiplies even-indexed elements, widening the result, masked. +// Result[i] = v1.Even[i] * v2.Even[i]. +// +// Asm: VPMULDQ, CPU Feature: AVX512EVEX +func (x Int64x2) MulEvenWidenMasked(y Int64x2, z Mask64x2) Int64x2 + +// MulEvenWiden multiplies even-indexed elements, widening the result, masked. +// Result[i] = v1.Even[i] * v2.Even[i]. +// +// Asm: VPMULDQ, CPU Feature: AVX512EVEX +func (x Int64x4) MulEvenWidenMasked(y Int64x4, z Mask64x4) Int64x4 + +// MulEvenWiden multiplies even-indexed elements, widening the result, masked. +// Result[i] = v1.Even[i] * v2.Even[i]. +// +// Asm: VPMULDQ, CPU Feature: AVX512EVEX +func (x Int64x8) MulEvenWidenMasked(y Int64x8, z Mask64x8) Int64x8 + +// MulEvenWiden multiplies even-indexed elements, widening the result, masked. +// Result[i] = v1.Even[i] * v2.Even[i]. +// +// Asm: VPMULUDQ, CPU Feature: AVX512EVEX +func (x Uint64x2) MulEvenWidenMasked(y Uint64x2, z Mask64x2) Uint64x2 + +// MulEvenWiden multiplies even-indexed elements, widening the result, masked. +// Result[i] = v1.Even[i] * v2.Even[i]. +// +// Asm: VPMULUDQ, CPU Feature: AVX512EVEX +func (x Uint64x4) MulEvenWidenMasked(y Uint64x4, z Mask64x4) Uint64x4 + +// MulEvenWiden multiplies even-indexed elements, widening the result, masked. +// Result[i] = v1.Even[i] * v2.Even[i]. +// +// Asm: VPMULUDQ, CPU Feature: AVX512EVEX +func (x Uint64x8) MulEvenWidenMasked(y Uint64x8, z Mask64x8) Uint64x8 + /* MulHigh */ // MulHigh multiplies elements and stores the high part of the result. @@ -6368,30 +4304,62 @@ func (x Uint64x8) MulEvenWiden(y Uint64x8) Uint64x8 // Asm: VPMULHW, CPU Feature: AVX func (x Int16x8) MulHigh(y Int16x8) Int16x8 -// MulHigh multiplies elements and stores the high part of the result. +// MulHigh multiplies elements and stores the high part of the result. +// +// Asm: VPMULHW, CPU Feature: AVX2 +func (x Int16x16) MulHigh(y Int16x16) Int16x16 + +// MulHigh multiplies elements and stores the high part of the result, masked. +// +// Asm: VPMULHW, CPU Feature: AVX512EVEX +func (x Int16x32) MulHigh(y Int16x32) Int16x32 + +// MulHigh multiplies elements and stores the high part of the result. +// +// Asm: VPMULHUW, CPU Feature: AVX +func (x Uint16x8) MulHigh(y Uint16x8) Uint16x8 + +// MulHigh multiplies elements and stores the high part of the result. +// +// Asm: VPMULHUW, CPU Feature: AVX2 +func (x Uint16x16) MulHigh(y Uint16x16) Uint16x16 + +// MulHigh multiplies elements and stores the high part of the result, masked. +// +// Asm: VPMULHUW, CPU Feature: AVX512EVEX +func (x Uint16x32) MulHigh(y Uint16x32) Uint16x32 + +/* MulHighMasked */ + +// MulHigh multiplies elements and stores the high part of the result, masked. +// +// Asm: VPMULHW, CPU Feature: AVX512EVEX +func (x Int16x8) MulHighMasked(y Int16x8, z Mask16x8) Int16x8 + +// MulHigh multiplies elements and stores the high part of the result, masked. // -// Asm: VPMULHW, CPU Feature: AVX2 -func (x Int16x16) MulHigh(y Int16x16) Int16x16 +// Asm: VPMULHW, CPU Feature: AVX512EVEX +func (x Int16x16) MulHighMasked(y Int16x16, z Mask16x16) Int16x16 // MulHigh multiplies elements and stores the high part of the result, masked. // // Asm: VPMULHW, CPU Feature: AVX512EVEX -func (x Int16x32) MulHigh(y Int16x32) Int16x32 +func (x Int16x32) MulHighMasked(y Int16x32, z Mask16x32) Int16x32 -// MulHigh multiplies elements and stores the high part of the result. +// MulHigh multiplies elements and stores the high part of the result, masked. // -// Asm: VPMULHUW, CPU Feature: AVX -func (x Uint16x8) MulHigh(y Uint16x8) Uint16x8 +// Asm: VPMULHUW, CPU Feature: AVX512EVEX +func (x Uint16x8) MulHighMasked(y Uint16x8, z Mask16x8) Uint16x8 -// MulHigh multiplies elements and stores the high part of the result. +// MulHigh multiplies elements and stores the high part of the result, masked. // -// Asm: VPMULHUW, CPU Feature: AVX2 -func (x Uint16x16) MulHigh(y Uint16x16) Uint16x16 +// Asm: VPMULHUW, CPU Feature: AVX512EVEX +func (x Uint16x16) MulHighMasked(y Uint16x16, z Mask16x16) Uint16x16 // MulHigh multiplies elements and stores the high part of the result, masked. // // Asm: VPMULHUW, CPU Feature: AVX512EVEX -func (x Uint16x32) MulHigh(y Uint16x32) Uint16x32 +func (x Uint16x32) MulHighMasked(y Uint16x32, z Mask16x32) Uint16x32 /* MulLow */ @@ -6440,6 +4408,85 @@ func (x Int64x4) MulLow(y Int64x4) Int64x4 // Asm: VPMULLQ, CPU Feature: AVX512EVEX func (x Int64x8) MulLow(y Int64x8) Int64x8 +/* MulLowMasked */ + +// MulLow multiplies elements and stores the low part of the result, masked. +// +// Asm: VPMULLW, CPU Feature: AVX512EVEX +func (x Int16x8) MulLowMasked(y Int16x8, z Mask16x8) Int16x8 + +// MulLow multiplies elements and stores the low part of the result, masked. +// +// Asm: VPMULLW, CPU Feature: AVX512EVEX +func (x Int16x16) MulLowMasked(y Int16x16, z Mask16x16) Int16x16 + +// MulLow multiplies elements and stores the low part of the result, masked. +// +// Asm: VPMULLW, CPU Feature: AVX512EVEX +func (x Int16x32) MulLowMasked(y Int16x32, z Mask16x32) Int16x32 + +// MulLow multiplies elements and stores the low part of the result, masked. +// +// Asm: VPMULLD, CPU Feature: AVX512EVEX +func (x Int32x4) MulLowMasked(y Int32x4, z Mask32x4) Int32x4 + +// MulLow multiplies elements and stores the low part of the result, masked. +// +// Asm: VPMULLD, CPU Feature: AVX512EVEX +func (x Int32x8) MulLowMasked(y Int32x8, z Mask32x8) Int32x8 + +// MulLow multiplies elements and stores the low part of the result, masked. +// +// Asm: VPMULLD, CPU Feature: AVX512EVEX +func (x Int32x16) MulLowMasked(y Int32x16, z Mask32x16) Int32x16 + +// MulLow multiplies elements and stores the low part of the result, masked. +// +// Asm: VPMULLQ, CPU Feature: AVX512EVEX +func (x Int64x2) MulLowMasked(y Int64x2, z Mask64x2) Int64x2 + +// MulLow multiplies elements and stores the low part of the result, masked. +// +// Asm: VPMULLQ, CPU Feature: AVX512EVEX +func (x Int64x4) MulLowMasked(y Int64x4, z Mask64x4) Int64x4 + +// MulLow multiplies elements and stores the low part of the result, masked. +// +// Asm: VPMULLQ, CPU Feature: AVX512EVEX +func (x Int64x8) MulLowMasked(y Int64x8, z Mask64x8) Int64x8 + +/* MulMasked */ + +// Mul multiplies corresponding elements of two vectors, masked. +// +// Asm: VMULPS, CPU Feature: AVX512EVEX +func (x Float32x4) MulMasked(y Float32x4, z Mask32x4) Float32x4 + +// Mul multiplies corresponding elements of two vectors, masked. +// +// Asm: VMULPS, CPU Feature: AVX512EVEX +func (x Float32x8) MulMasked(y Float32x8, z Mask32x8) Float32x8 + +// Mul multiplies corresponding elements of two vectors, masked. +// +// Asm: VMULPS, CPU Feature: AVX512EVEX +func (x Float32x16) MulMasked(y Float32x16, z Mask32x16) Float32x16 + +// Mul multiplies corresponding elements of two vectors, masked. +// +// Asm: VMULPD, CPU Feature: AVX512EVEX +func (x Float64x2) MulMasked(y Float64x2, z Mask64x2) Float64x2 + +// Mul multiplies corresponding elements of two vectors, masked. +// +// Asm: VMULPD, CPU Feature: AVX512EVEX +func (x Float64x4) MulMasked(y Float64x4, z Mask64x4) Float64x4 + +// Mul multiplies corresponding elements of two vectors, masked. +// +// Asm: VMULPD, CPU Feature: AVX512EVEX +func (x Float64x8) MulMasked(y Float64x8, z Mask64x8) Float64x8 + /* NotEqual */ // NotEqual compares for inequality. @@ -6592,6 +4639,158 @@ func (x Uint64x4) NotEqual(y Uint64x4) Mask64x4 // Asm: VPCMPUQ, CPU Feature: AVX512EVEX func (x Uint64x8) NotEqual(y Uint64x8) Mask64x8 +/* NotEqualMasked */ + +// NotEqual compares for inequality. +// +// Asm: VCMPPS, CPU Feature: AVX512EVEX +func (x Float32x4) NotEqualMasked(y Float32x4, z Mask32x4) Mask32x4 + +// NotEqual compares for inequality. +// +// Asm: VCMPPS, CPU Feature: AVX512EVEX +func (x Float32x8) NotEqualMasked(y Float32x8, z Mask32x8) Mask32x8 + +// NotEqual compares for inequality. +// +// Asm: VCMPPS, CPU Feature: AVX512EVEX +func (x Float32x16) NotEqualMasked(y Float32x16, z Mask32x16) Mask32x16 + +// NotEqual compares for inequality. +// +// Asm: VCMPPD, CPU Feature: AVX512EVEX +func (x Float64x2) NotEqualMasked(y Float64x2, z Mask64x2) Mask64x2 + +// NotEqual compares for inequality. +// +// Asm: VCMPPD, CPU Feature: AVX512EVEX +func (x Float64x4) NotEqualMasked(y Float64x4, z Mask64x4) Mask64x4 + +// NotEqual compares for inequality. +// +// Asm: VCMPPD, CPU Feature: AVX512EVEX +func (x Float64x8) NotEqualMasked(y Float64x8, z Mask64x8) Mask64x8 + +// NotEqual compares for inequality. +// +// Asm: VPCMPB, CPU Feature: AVX512EVEX +func (x Int8x16) NotEqualMasked(y Int8x16, z Mask8x16) Mask8x16 + +// NotEqual compares for inequality. +// +// Asm: VPCMPB, CPU Feature: AVX512EVEX +func (x Int8x32) NotEqualMasked(y Int8x32, z Mask8x32) Mask8x32 + +// NotEqual compares for inequality. +// +// Asm: VPCMPB, CPU Feature: AVX512EVEX +func (x Int8x64) NotEqualMasked(y Int8x64, z Mask8x64) Mask8x64 + +// NotEqual compares for inequality. +// +// Asm: VPCMPW, CPU Feature: AVX512EVEX +func (x Int16x8) NotEqualMasked(y Int16x8, z Mask16x8) Mask16x8 + +// NotEqual compares for inequality. +// +// Asm: VPCMPW, CPU Feature: AVX512EVEX +func (x Int16x16) NotEqualMasked(y Int16x16, z Mask16x16) Mask16x16 + +// NotEqual compares for inequality. +// +// Asm: VPCMPW, CPU Feature: AVX512EVEX +func (x Int16x32) NotEqualMasked(y Int16x32, z Mask16x32) Mask16x32 + +// NotEqual compares for inequality. +// +// Asm: VPCMPD, CPU Feature: AVX512EVEX +func (x Int32x4) NotEqualMasked(y Int32x4, z Mask32x4) Mask32x4 + +// NotEqual compares for inequality. +// +// Asm: VPCMPD, CPU Feature: AVX512EVEX +func (x Int32x8) NotEqualMasked(y Int32x8, z Mask32x8) Mask32x8 + +// NotEqual compares for inequality. +// +// Asm: VPCMPD, CPU Feature: AVX512EVEX +func (x Int32x16) NotEqualMasked(y Int32x16, z Mask32x16) Mask32x16 + +// NotEqual compares for inequality. +// +// Asm: VPCMPQ, CPU Feature: AVX512EVEX +func (x Int64x2) NotEqualMasked(y Int64x2, z Mask64x2) Mask64x2 + +// NotEqual compares for inequality. +// +// Asm: VPCMPQ, CPU Feature: AVX512EVEX +func (x Int64x4) NotEqualMasked(y Int64x4, z Mask64x4) Mask64x4 + +// NotEqual compares for inequality. +// +// Asm: VPCMPQ, CPU Feature: AVX512EVEX +func (x Int64x8) NotEqualMasked(y Int64x8, z Mask64x8) Mask64x8 + +// NotEqual compares for inequality. +// +// Asm: VPCMPUB, CPU Feature: AVX512EVEX +func (x Uint8x16) NotEqualMasked(y Uint8x16, z Mask8x16) Mask8x16 + +// NotEqual compares for inequality. +// +// Asm: VPCMPUB, CPU Feature: AVX512EVEX +func (x Uint8x32) NotEqualMasked(y Uint8x32, z Mask8x32) Mask8x32 + +// NotEqual compares for inequality. +// +// Asm: VPCMPUB, CPU Feature: AVX512EVEX +func (x Uint8x64) NotEqualMasked(y Uint8x64, z Mask8x64) Mask8x64 + +// NotEqual compares for inequality. +// +// Asm: VPCMPUW, CPU Feature: AVX512EVEX +func (x Uint16x8) NotEqualMasked(y Uint16x8, z Mask16x8) Mask16x8 + +// NotEqual compares for inequality. +// +// Asm: VPCMPUW, CPU Feature: AVX512EVEX +func (x Uint16x16) NotEqualMasked(y Uint16x16, z Mask16x16) Mask16x16 + +// NotEqual compares for inequality. +// +// Asm: VPCMPUW, CPU Feature: AVX512EVEX +func (x Uint16x32) NotEqualMasked(y Uint16x32, z Mask16x32) Mask16x32 + +// NotEqual compares for inequality. +// +// Asm: VPCMPUD, CPU Feature: AVX512EVEX +func (x Uint32x4) NotEqualMasked(y Uint32x4, z Mask32x4) Mask32x4 + +// NotEqual compares for inequality. +// +// Asm: VPCMPUD, CPU Feature: AVX512EVEX +func (x Uint32x8) NotEqualMasked(y Uint32x8, z Mask32x8) Mask32x8 + +// NotEqual compares for inequality. +// +// Asm: VPCMPUD, CPU Feature: AVX512EVEX +func (x Uint32x16) NotEqualMasked(y Uint32x16, z Mask32x16) Mask32x16 + +// NotEqual compares for inequality. +// +// Asm: VPCMPUQ, CPU Feature: AVX512EVEX +func (x Uint64x2) NotEqualMasked(y Uint64x2, z Mask64x2) Mask64x2 + +// NotEqual compares for inequality. +// +// Asm: VPCMPUQ, CPU Feature: AVX512EVEX +func (x Uint64x4) NotEqualMasked(y Uint64x4, z Mask64x4) Mask64x4 + +// NotEqual compares for inequality. +// +// Asm: VPCMPUQ, CPU Feature: AVX512EVEX +func (x Uint64x8) NotEqualMasked(y Uint64x8, z Mask64x8) Mask64x8 + /* Or */ // Or performs a bitwise OR operation between two vectors. @@ -6659,40 +4858,102 @@ func (x Uint8x32) Or(y Uint8x32) Uint8x32 // Asm: VPOR, CPU Feature: AVX func (x Uint16x8) Or(y Uint16x8) Uint16x8 -// Or performs a bitwise OR operation between two vectors. +// Or performs a bitwise OR operation between two vectors. +// +// Asm: VPOR, CPU Feature: AVX2 +func (x Uint16x16) Or(y Uint16x16) Uint16x16 + +// Or performs a bitwise OR operation between two vectors. +// +// Asm: VPOR, CPU Feature: AVX +func (x Uint32x4) Or(y Uint32x4) Uint32x4 + +// Or performs a bitwise OR operation between two vectors. +// +// Asm: VPOR, CPU Feature: AVX2 +func (x Uint32x8) Or(y Uint32x8) Uint32x8 + +// Or performs a masked bitwise OR operation between two vectors. +// +// Asm: VPORD, CPU Feature: AVX512EVEX +func (x Uint32x16) Or(y Uint32x16) Uint32x16 + +// Or performs a bitwise OR operation between two vectors. +// +// Asm: VPOR, CPU Feature: AVX +func (x Uint64x2) Or(y Uint64x2) Uint64x2 + +// Or performs a bitwise OR operation between two vectors. +// +// Asm: VPOR, CPU Feature: AVX2 +func (x Uint64x4) Or(y Uint64x4) Uint64x4 + +// Or performs a masked bitwise OR operation between two vectors. +// +// Asm: VPORQ, CPU Feature: AVX512EVEX +func (x Uint64x8) Or(y Uint64x8) Uint64x8 + +/* OrMasked */ + +// Or performs a masked bitwise OR operation between two vectors. +// +// Asm: VPORD, CPU Feature: AVX512EVEX +func (x Int32x4) OrMasked(y Int32x4, z Mask32x4) Int32x4 + +// Or performs a masked bitwise OR operation between two vectors. +// +// Asm: VPORD, CPU Feature: AVX512EVEX +func (x Int32x8) OrMasked(y Int32x8, z Mask32x8) Int32x8 + +// Or performs a masked bitwise OR operation between two vectors. +// +// Asm: VPORD, CPU Feature: AVX512EVEX +func (x Int32x16) OrMasked(y Int32x16, z Mask32x16) Int32x16 + +// Or performs a masked bitwise OR operation between two vectors. +// +// Asm: VPORQ, CPU Feature: AVX512EVEX +func (x Int64x2) OrMasked(y Int64x2, z Mask64x2) Int64x2 + +// Or performs a masked bitwise OR operation between two vectors. +// +// Asm: VPORQ, CPU Feature: AVX512EVEX +func (x Int64x4) OrMasked(y Int64x4, z Mask64x4) Int64x4 + +// Or performs a masked bitwise OR operation between two vectors. // -// Asm: VPOR, CPU Feature: AVX2 -func (x Uint16x16) Or(y Uint16x16) Uint16x16 +// Asm: VPORQ, CPU Feature: AVX512EVEX +func (x Int64x8) OrMasked(y Int64x8, z Mask64x8) Int64x8 -// Or performs a bitwise OR operation between two vectors. +// Or performs a masked bitwise OR operation between two vectors. // -// Asm: VPOR, CPU Feature: AVX -func (x Uint32x4) Or(y Uint32x4) Uint32x4 +// Asm: VPORD, CPU Feature: AVX512EVEX +func (x Uint32x4) OrMasked(y Uint32x4, z Mask32x4) Uint32x4 -// Or performs a bitwise OR operation between two vectors. +// Or performs a masked bitwise OR operation between two vectors. // -// Asm: VPOR, CPU Feature: AVX2 -func (x Uint32x8) Or(y Uint32x8) Uint32x8 +// Asm: VPORD, CPU Feature: AVX512EVEX +func (x Uint32x8) OrMasked(y Uint32x8, z Mask32x8) Uint32x8 // Or performs a masked bitwise OR operation between two vectors. // // Asm: VPORD, CPU Feature: AVX512EVEX -func (x Uint32x16) Or(y Uint32x16) Uint32x16 +func (x Uint32x16) OrMasked(y Uint32x16, z Mask32x16) Uint32x16 -// Or performs a bitwise OR operation between two vectors. +// Or performs a masked bitwise OR operation between two vectors. // -// Asm: VPOR, CPU Feature: AVX -func (x Uint64x2) Or(y Uint64x2) Uint64x2 +// Asm: VPORQ, CPU Feature: AVX512EVEX +func (x Uint64x2) OrMasked(y Uint64x2, z Mask64x2) Uint64x2 -// Or performs a bitwise OR operation between two vectors. +// Or performs a masked bitwise OR operation between two vectors. // -// Asm: VPOR, CPU Feature: AVX2 -func (x Uint64x4) Or(y Uint64x4) Uint64x4 +// Asm: VPORQ, CPU Feature: AVX512EVEX +func (x Uint64x4) OrMasked(y Uint64x4, z Mask64x4) Uint64x4 // Or performs a masked bitwise OR operation between two vectors. // // Asm: VPORQ, CPU Feature: AVX512EVEX -func (x Uint64x8) Or(y Uint64x8) Uint64x8 +func (x Uint64x8) OrMasked(y Uint64x8, z Mask64x8) Uint64x8 /* PairDotProd */ @@ -6731,6 +4992,43 @@ func (x Int32x8) PairDotProdAccumulate(y Int16x16, z Int16x16) Int32x8 // Asm: VPDPWSSD, CPU Feature: AVX512EVEX func (x Int32x16) PairDotProdAccumulate(y Int16x32, z Int16x32) Int32x16 +/* PairDotProdAccumulateMasked */ + +// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. +// +// Asm: VPDPWSSD, CPU Feature: AVX512EVEX +func (x Int32x4) PairDotProdAccumulateMasked(y Int16x8, z Int16x8, u Mask32x4) Int32x4 + +// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. +// +// Asm: VPDPWSSD, CPU Feature: AVX512EVEX +func (x Int32x8) PairDotProdAccumulateMasked(y Int16x16, z Int16x16, u Mask32x8) Int32x8 + +// PairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. +// +// Asm: VPDPWSSD, CPU Feature: AVX512EVEX +func (x Int32x16) PairDotProdAccumulateMasked(y Int16x32, z Int16x32, u Mask32x16) Int32x16 + +/* PairDotProdMasked */ + +// PairDotProd multiplies the elements and add the pairs together, +// yielding a vector of half as many elements with twice the input element size. +// +// Asm: VPMADDWD, CPU Feature: AVX512EVEX +func (x Int16x8) PairDotProdMasked(y Int16x8, z Mask16x8) Int32x4 + +// PairDotProd multiplies the elements and add the pairs together, +// yielding a vector of half as many elements with twice the input element size. +// +// Asm: VPMADDWD, CPU Feature: AVX512EVEX +func (x Int16x16) PairDotProdMasked(y Int16x16, z Mask16x16) Int32x8 + +// PairDotProd multiplies the elements and add the pairs together, +// yielding a vector of half as many elements with twice the input element size. +// +// Asm: VPMADDWD, CPU Feature: AVX512EVEX +func (x Int16x32) PairDotProdMasked(y Int16x32, z Mask16x32) Int32x16 + /* PairwiseAdd */ // PairwiseAdd horizontally adds adjacent pairs of elements. @@ -7001,6 +5299,128 @@ func (x Uint64x4) PopCount() Uint64x4 // Asm: VPOPCNTQ, CPU Feature: AVX512EVEX func (x Uint64x8) PopCount() Uint64x8 +/* PopCountMasked */ + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTB, CPU Feature: AVX512EVEX +func (x Int8x16) PopCountMasked(y Mask8x16) Int8x16 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTB, CPU Feature: AVX512EVEX +func (x Int8x32) PopCountMasked(y Mask8x32) Int8x32 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTB, CPU Feature: AVX512EVEX +func (x Int8x64) PopCountMasked(y Mask8x64) Int8x64 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTW, CPU Feature: AVX512EVEX +func (x Int16x8) PopCountMasked(y Mask16x8) Int16x8 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTW, CPU Feature: AVX512EVEX +func (x Int16x16) PopCountMasked(y Mask16x16) Int16x16 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTW, CPU Feature: AVX512EVEX +func (x Int16x32) PopCountMasked(y Mask16x32) Int16x32 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTD, CPU Feature: AVX512EVEX +func (x Int32x4) PopCountMasked(y Mask32x4) Int32x4 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTD, CPU Feature: AVX512EVEX +func (x Int32x8) PopCountMasked(y Mask32x8) Int32x8 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTD, CPU Feature: AVX512EVEX +func (x Int32x16) PopCountMasked(y Mask32x16) Int32x16 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX +func (x Int64x2) PopCountMasked(y Mask64x2) Int64x2 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX +func (x Int64x4) PopCountMasked(y Mask64x4) Int64x4 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX +func (x Int64x8) PopCountMasked(y Mask64x8) Int64x8 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTB, CPU Feature: AVX512EVEX +func (x Uint8x16) PopCountMasked(y Mask8x16) Uint8x16 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTB, CPU Feature: AVX512EVEX +func (x Uint8x32) PopCountMasked(y Mask8x32) Uint8x32 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTB, CPU Feature: AVX512EVEX +func (x Uint8x64) PopCountMasked(y Mask8x64) Uint8x64 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTW, CPU Feature: AVX512EVEX +func (x Uint16x8) PopCountMasked(y Mask16x8) Uint16x8 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTW, CPU Feature: AVX512EVEX +func (x Uint16x16) PopCountMasked(y Mask16x16) Uint16x16 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTW, CPU Feature: AVX512EVEX +func (x Uint16x32) PopCountMasked(y Mask16x32) Uint16x32 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTD, CPU Feature: AVX512EVEX +func (x Uint32x4) PopCountMasked(y Mask32x4) Uint32x4 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTD, CPU Feature: AVX512EVEX +func (x Uint32x8) PopCountMasked(y Mask32x8) Uint32x8 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTD, CPU Feature: AVX512EVEX +func (x Uint32x16) PopCountMasked(y Mask32x16) Uint32x16 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX +func (x Uint64x2) PopCountMasked(y Mask64x2) Uint64x2 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX +func (x Uint64x4) PopCountMasked(y Mask64x4) Uint64x4 + +// PopCount counts the number of set bits in each element. +// +// Asm: VPOPCNTQ, CPU Feature: AVX512EVEX +func (x Uint64x8) PopCountMasked(y Mask64x8) Uint64x8 + /* RotateAllLeft */ // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. @@ -7063,6 +5483,68 @@ func (x Uint64x4) RotateAllLeft(imm uint8) Uint64x4 // Asm: VPROLQ, CPU Feature: AVX512EVEX func (x Uint64x8) RotateAllLeft(imm uint8) Uint64x8 +/* RotateAllLeftMasked */ + +// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// +// Asm: VPROLD, CPU Feature: AVX512EVEX +func (x Int32x4) RotateAllLeftMasked(imm uint8, y Mask32x4) Int32x4 + +// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// +// Asm: VPROLD, CPU Feature: AVX512EVEX +func (x Int32x8) RotateAllLeftMasked(imm uint8, y Mask32x8) Int32x8 + +// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// +// Asm: VPROLD, CPU Feature: AVX512EVEX +func (x Int32x16) RotateAllLeftMasked(imm uint8, y Mask32x16) Int32x16 + +// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// +// Asm: VPROLQ, CPU Feature: AVX512EVEX +func (x Int64x2) RotateAllLeftMasked(imm uint8, y Mask64x2) Int64x2 + +// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// +// Asm: VPROLQ, CPU Feature: AVX512EVEX +func (x Int64x4) RotateAllLeftMasked(imm uint8, y Mask64x4) Int64x4 + +// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// +// Asm: VPROLQ, CPU Feature: AVX512EVEX +func (x Int64x8) RotateAllLeftMasked(imm uint8, y Mask64x8) Int64x8 + +// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// +// Asm: VPROLD, CPU Feature: AVX512EVEX +func (x Uint32x4) RotateAllLeftMasked(imm uint8, y Mask32x4) Uint32x4 + +// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// +// Asm: VPROLD, CPU Feature: AVX512EVEX +func (x Uint32x8) RotateAllLeftMasked(imm uint8, y Mask32x8) Uint32x8 + +// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// +// Asm: VPROLD, CPU Feature: AVX512EVEX +func (x Uint32x16) RotateAllLeftMasked(imm uint8, y Mask32x16) Uint32x16 + +// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// +// Asm: VPROLQ, CPU Feature: AVX512EVEX +func (x Uint64x2) RotateAllLeftMasked(imm uint8, y Mask64x2) Uint64x2 + +// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// +// Asm: VPROLQ, CPU Feature: AVX512EVEX +func (x Uint64x4) RotateAllLeftMasked(imm uint8, y Mask64x4) Uint64x4 + +// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// +// Asm: VPROLQ, CPU Feature: AVX512EVEX +func (x Uint64x8) RotateAllLeftMasked(imm uint8, y Mask64x8) Uint64x8 + /* RotateAllRight */ // RotateAllRight rotates each element to the right by the number of bits specified by the immediate. @@ -7108,22 +5590,84 @@ func (x Uint32x8) RotateAllRight(imm uint8) Uint32x8 // RotateAllRight rotates each element to the right by the number of bits specified by the immediate. // // Asm: VPRORD, CPU Feature: AVX512EVEX -func (x Uint32x16) RotateAllRight(imm uint8) Uint32x16 +func (x Uint32x16) RotateAllRight(imm uint8) Uint32x16 + +// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// +// Asm: VPRORQ, CPU Feature: AVX512EVEX +func (x Uint64x2) RotateAllRight(imm uint8) Uint64x2 + +// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// +// Asm: VPRORQ, CPU Feature: AVX512EVEX +func (x Uint64x4) RotateAllRight(imm uint8) Uint64x4 + +// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// +// Asm: VPRORQ, CPU Feature: AVX512EVEX +func (x Uint64x8) RotateAllRight(imm uint8) Uint64x8 + +/* RotateAllRightMasked */ + +// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// +// Asm: VPRORD, CPU Feature: AVX512EVEX +func (x Int32x4) RotateAllRightMasked(imm uint8, y Mask32x4) Int32x4 + +// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// +// Asm: VPRORD, CPU Feature: AVX512EVEX +func (x Int32x8) RotateAllRightMasked(imm uint8, y Mask32x8) Int32x8 + +// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// +// Asm: VPRORD, CPU Feature: AVX512EVEX +func (x Int32x16) RotateAllRightMasked(imm uint8, y Mask32x16) Int32x16 + +// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// +// Asm: VPRORQ, CPU Feature: AVX512EVEX +func (x Int64x2) RotateAllRightMasked(imm uint8, y Mask64x2) Int64x2 + +// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// +// Asm: VPRORQ, CPU Feature: AVX512EVEX +func (x Int64x4) RotateAllRightMasked(imm uint8, y Mask64x4) Int64x4 + +// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// +// Asm: VPRORQ, CPU Feature: AVX512EVEX +func (x Int64x8) RotateAllRightMasked(imm uint8, y Mask64x8) Int64x8 + +// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// +// Asm: VPRORD, CPU Feature: AVX512EVEX +func (x Uint32x4) RotateAllRightMasked(imm uint8, y Mask32x4) Uint32x4 + +// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// +// Asm: VPRORD, CPU Feature: AVX512EVEX +func (x Uint32x8) RotateAllRightMasked(imm uint8, y Mask32x8) Uint32x8 + +// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// +// Asm: VPRORD, CPU Feature: AVX512EVEX +func (x Uint32x16) RotateAllRightMasked(imm uint8, y Mask32x16) Uint32x16 // RotateAllRight rotates each element to the right by the number of bits specified by the immediate. // // Asm: VPRORQ, CPU Feature: AVX512EVEX -func (x Uint64x2) RotateAllRight(imm uint8) Uint64x2 +func (x Uint64x2) RotateAllRightMasked(imm uint8, y Mask64x2) Uint64x2 // RotateAllRight rotates each element to the right by the number of bits specified by the immediate. // // Asm: VPRORQ, CPU Feature: AVX512EVEX -func (x Uint64x4) RotateAllRight(imm uint8) Uint64x4 +func (x Uint64x4) RotateAllRightMasked(imm uint8, y Mask64x4) Uint64x4 // RotateAllRight rotates each element to the right by the number of bits specified by the immediate. // // Asm: VPRORQ, CPU Feature: AVX512EVEX -func (x Uint64x8) RotateAllRight(imm uint8) Uint64x8 +func (x Uint64x8) RotateAllRightMasked(imm uint8, y Mask64x8) Uint64x8 /* RotateLeft */ @@ -7187,6 +5731,68 @@ func (x Uint64x4) RotateLeft(y Uint64x4) Uint64x4 // Asm: VPROLVQ, CPU Feature: AVX512EVEX func (x Uint64x8) RotateLeft(y Uint64x8) Uint64x8 +/* RotateLeftMasked */ + +// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. +// +// Asm: VPROLVD, CPU Feature: AVX512EVEX +func (x Int32x4) RotateLeftMasked(y Int32x4, z Mask32x4) Int32x4 + +// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. +// +// Asm: VPROLVD, CPU Feature: AVX512EVEX +func (x Int32x8) RotateLeftMasked(y Int32x8, z Mask32x8) Int32x8 + +// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. +// +// Asm: VPROLVD, CPU Feature: AVX512EVEX +func (x Int32x16) RotateLeftMasked(y Int32x16, z Mask32x16) Int32x16 + +// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. +// +// Asm: VPROLVQ, CPU Feature: AVX512EVEX +func (x Int64x2) RotateLeftMasked(y Int64x2, z Mask64x2) Int64x2 + +// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. +// +// Asm: VPROLVQ, CPU Feature: AVX512EVEX +func (x Int64x4) RotateLeftMasked(y Int64x4, z Mask64x4) Int64x4 + +// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. +// +// Asm: VPROLVQ, CPU Feature: AVX512EVEX +func (x Int64x8) RotateLeftMasked(y Int64x8, z Mask64x8) Int64x8 + +// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. +// +// Asm: VPROLVD, CPU Feature: AVX512EVEX +func (x Uint32x4) RotateLeftMasked(y Uint32x4, z Mask32x4) Uint32x4 + +// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. +// +// Asm: VPROLVD, CPU Feature: AVX512EVEX +func (x Uint32x8) RotateLeftMasked(y Uint32x8, z Mask32x8) Uint32x8 + +// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. +// +// Asm: VPROLVD, CPU Feature: AVX512EVEX +func (x Uint32x16) RotateLeftMasked(y Uint32x16, z Mask32x16) Uint32x16 + +// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. +// +// Asm: VPROLVQ, CPU Feature: AVX512EVEX +func (x Uint64x2) RotateLeftMasked(y Uint64x2, z Mask64x2) Uint64x2 + +// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. +// +// Asm: VPROLVQ, CPU Feature: AVX512EVEX +func (x Uint64x4) RotateLeftMasked(y Uint64x4, z Mask64x4) Uint64x4 + +// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. +// +// Asm: VPROLVQ, CPU Feature: AVX512EVEX +func (x Uint64x8) RotateLeftMasked(y Uint64x8, z Mask64x8) Uint64x8 + /* RotateRight */ // RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. @@ -7249,6 +5855,68 @@ func (x Uint64x4) RotateRight(y Uint64x4) Uint64x4 // Asm: VPRORVQ, CPU Feature: AVX512EVEX func (x Uint64x8) RotateRight(y Uint64x8) Uint64x8 +/* RotateRightMasked */ + +// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. +// +// Asm: VPRORVD, CPU Feature: AVX512EVEX +func (x Int32x4) RotateRightMasked(y Int32x4, z Mask32x4) Int32x4 + +// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. +// +// Asm: VPRORVD, CPU Feature: AVX512EVEX +func (x Int32x8) RotateRightMasked(y Int32x8, z Mask32x8) Int32x8 + +// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. +// +// Asm: VPRORVD, CPU Feature: AVX512EVEX +func (x Int32x16) RotateRightMasked(y Int32x16, z Mask32x16) Int32x16 + +// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. +// +// Asm: VPRORVQ, CPU Feature: AVX512EVEX +func (x Int64x2) RotateRightMasked(y Int64x2, z Mask64x2) Int64x2 + +// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. +// +// Asm: VPRORVQ, CPU Feature: AVX512EVEX +func (x Int64x4) RotateRightMasked(y Int64x4, z Mask64x4) Int64x4 + +// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. +// +// Asm: VPRORVQ, CPU Feature: AVX512EVEX +func (x Int64x8) RotateRightMasked(y Int64x8, z Mask64x8) Int64x8 + +// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. +// +// Asm: VPRORVD, CPU Feature: AVX512EVEX +func (x Uint32x4) RotateRightMasked(y Uint32x4, z Mask32x4) Uint32x4 + +// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. +// +// Asm: VPRORVD, CPU Feature: AVX512EVEX +func (x Uint32x8) RotateRightMasked(y Uint32x8, z Mask32x8) Uint32x8 + +// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. +// +// Asm: VPRORVD, CPU Feature: AVX512EVEX +func (x Uint32x16) RotateRightMasked(y Uint32x16, z Mask32x16) Uint32x16 + +// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. +// +// Asm: VPRORVQ, CPU Feature: AVX512EVEX +func (x Uint64x2) RotateRightMasked(y Uint64x2, z Mask64x2) Uint64x2 + +// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. +// +// Asm: VPRORVQ, CPU Feature: AVX512EVEX +func (x Uint64x4) RotateRightMasked(y Uint64x4, z Mask64x4) Uint64x4 + +// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. +// +// Asm: VPRORVQ, CPU Feature: AVX512EVEX +func (x Uint64x8) RotateRightMasked(y Uint64x8, z Mask64x8) Uint64x8 + /* Round */ // Round rounds elements to the nearest integer. @@ -7303,6 +5971,38 @@ func (x Float64x4) RoundWithPrecision(imm uint8) Float64x4 // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX func (x Float64x8) RoundWithPrecision(imm uint8) Float64x8 +/* RoundWithPrecisionMasked */ + +// RoundWithPrecision rounds elements with specified precision. +// +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x4) RoundWithPrecisionMasked(imm uint8, y Mask32x4) Float32x4 + +// RoundWithPrecision rounds elements with specified precision. +// +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x8) RoundWithPrecisionMasked(imm uint8, y Mask32x8) Float32x8 + +// RoundWithPrecision rounds elements with specified precision. +// +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x16) RoundWithPrecisionMasked(imm uint8, y Mask32x16) Float32x16 + +// RoundWithPrecision rounds elements with specified precision. +// +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x2) RoundWithPrecisionMasked(imm uint8, y Mask64x2) Float64x2 + +// RoundWithPrecision rounds elements with specified precision. +// +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x4) RoundWithPrecisionMasked(imm uint8, y Mask64x4) Float64x4 + +// RoundWithPrecision rounds elements with specified precision. +// +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x8) RoundWithPrecisionMasked(imm uint8, y Mask64x8) Float64x8 + /* SaturatedAdd */ // SaturatedAdd adds corresponding elements of two vectors with saturation. @@ -7365,6 +6065,68 @@ func (x Uint16x16) SaturatedAdd(y Uint16x16) Uint16x16 // Asm: VPADDSW, CPU Feature: AVX512EVEX func (x Uint16x32) SaturatedAdd(y Uint16x32) Uint16x32 +/* SaturatedAddMasked */ + +// SaturatedAdd adds corresponding elements of two vectors with saturation. +// +// Asm: VPADDSB, CPU Feature: AVX512EVEX +func (x Int8x16) SaturatedAddMasked(y Int8x16, z Mask8x16) Int8x16 + +// SaturatedAdd adds corresponding elements of two vectors with saturation. +// +// Asm: VPADDSB, CPU Feature: AVX512EVEX +func (x Int8x32) SaturatedAddMasked(y Int8x32, z Mask8x32) Int8x32 + +// SaturatedAdd adds corresponding elements of two vectors with saturation. +// +// Asm: VPADDSB, CPU Feature: AVX512EVEX +func (x Int8x64) SaturatedAddMasked(y Int8x64, z Mask8x64) Int8x64 + +// SaturatedAdd adds corresponding elements of two vectors with saturation. +// +// Asm: VPADDSW, CPU Feature: AVX512EVEX +func (x Int16x8) SaturatedAddMasked(y Int16x8, z Mask16x8) Int16x8 + +// SaturatedAdd adds corresponding elements of two vectors with saturation. +// +// Asm: VPADDSW, CPU Feature: AVX512EVEX +func (x Int16x16) SaturatedAddMasked(y Int16x16, z Mask16x16) Int16x16 + +// SaturatedAdd adds corresponding elements of two vectors with saturation. +// +// Asm: VPADDSW, CPU Feature: AVX512EVEX +func (x Int16x32) SaturatedAddMasked(y Int16x32, z Mask16x32) Int16x32 + +// SaturatedAdd adds corresponding elements of two vectors with saturation. +// +// Asm: VPADDSB, CPU Feature: AVX512EVEX +func (x Uint8x16) SaturatedAddMasked(y Uint8x16, z Mask8x16) Uint8x16 + +// SaturatedAdd adds corresponding elements of two vectors with saturation. +// +// Asm: VPADDSB, CPU Feature: AVX512EVEX +func (x Uint8x32) SaturatedAddMasked(y Uint8x32, z Mask8x32) Uint8x32 + +// SaturatedAdd adds corresponding elements of two vectors with saturation. +// +// Asm: VPADDSB, CPU Feature: AVX512EVEX +func (x Uint8x64) SaturatedAddMasked(y Uint8x64, z Mask8x64) Uint8x64 + +// SaturatedAdd adds corresponding elements of two vectors with saturation. +// +// Asm: VPADDSW, CPU Feature: AVX512EVEX +func (x Uint16x8) SaturatedAddMasked(y Uint16x8, z Mask16x8) Uint16x8 + +// SaturatedAdd adds corresponding elements of two vectors with saturation. +// +// Asm: VPADDSW, CPU Feature: AVX512EVEX +func (x Uint16x16) SaturatedAddMasked(y Uint16x16, z Mask16x16) Uint16x16 + +// SaturatedAdd adds corresponding elements of two vectors with saturation. +// +// Asm: VPADDSW, CPU Feature: AVX512EVEX +func (x Uint16x32) SaturatedAddMasked(y Uint16x32, z Mask16x32) Uint16x32 + /* SaturatedPairDotProdAccumulate */ // SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. @@ -7382,6 +6144,23 @@ func (x Int32x8) SaturatedPairDotProdAccumulate(y Int16x16, z Int16x16) Int32x8 // Asm: VPDPWSSDS, CPU Feature: AVX512EVEX func (x Int32x16) SaturatedPairDotProdAccumulate(y Int16x32, z Int16x32) Int32x16 +/* SaturatedPairDotProdAccumulateMasked */ + +// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. +// +// Asm: VPDPWSSDS, CPU Feature: AVX512EVEX +func (x Int32x4) SaturatedPairDotProdAccumulateMasked(y Int16x8, z Int16x8, u Mask32x4) Int32x4 + +// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. +// +// Asm: VPDPWSSDS, CPU Feature: AVX512EVEX +func (x Int32x8) SaturatedPairDotProdAccumulateMasked(y Int16x16, z Int16x16, u Mask32x8) Int32x8 + +// SaturatedPairDotProdAccumulate performs dot products on pairs of elements of y and z and accumulates the results to x. +// +// Asm: VPDPWSSDS, CPU Feature: AVX512EVEX +func (x Int32x16) SaturatedPairDotProdAccumulateMasked(y Int16x32, z Int16x32, u Mask32x16) Int32x16 + /* SaturatedPairwiseAdd */ // SaturatedPairwiseAdd horizontally adds adjacent pairs of elements with saturation. @@ -7472,25 +6251,107 @@ func (x Uint16x16) SaturatedSub(y Uint16x16) Uint16x16 // Asm: VPSUBSW, CPU Feature: AVX512EVEX func (x Uint16x32) SaturatedSub(y Uint16x32) Uint16x32 -/* SaturatedUnsignedSignedPairDotProd */ +/* SaturatedSubMasked */ + +// SaturatedSub subtracts corresponding elements of two vectors with saturation. +// +// Asm: VPSUBSB, CPU Feature: AVX512EVEX +func (x Int8x16) SaturatedSubMasked(y Int8x16, z Mask8x16) Int8x16 + +// SaturatedSub subtracts corresponding elements of two vectors with saturation. +// +// Asm: VPSUBSB, CPU Feature: AVX512EVEX +func (x Int8x32) SaturatedSubMasked(y Int8x32, z Mask8x32) Int8x32 + +// SaturatedSub subtracts corresponding elements of two vectors with saturation. +// +// Asm: VPSUBSB, CPU Feature: AVX512EVEX +func (x Int8x64) SaturatedSubMasked(y Int8x64, z Mask8x64) Int8x64 + +// SaturatedSub subtracts corresponding elements of two vectors with saturation. +// +// Asm: VPSUBSW, CPU Feature: AVX512EVEX +func (x Int16x8) SaturatedSubMasked(y Int16x8, z Mask16x8) Int16x8 + +// SaturatedSub subtracts corresponding elements of two vectors with saturation. +// +// Asm: VPSUBSW, CPU Feature: AVX512EVEX +func (x Int16x16) SaturatedSubMasked(y Int16x16, z Mask16x16) Int16x16 + +// SaturatedSub subtracts corresponding elements of two vectors with saturation. +// +// Asm: VPSUBSW, CPU Feature: AVX512EVEX +func (x Int16x32) SaturatedSubMasked(y Int16x32, z Mask16x32) Int16x32 + +// SaturatedSub subtracts corresponding elements of two vectors with saturation. +// +// Asm: VPSUBSB, CPU Feature: AVX512EVEX +func (x Uint8x16) SaturatedSubMasked(y Uint8x16, z Mask8x16) Uint8x16 + +// SaturatedSub subtracts corresponding elements of two vectors with saturation. +// +// Asm: VPSUBSB, CPU Feature: AVX512EVEX +func (x Uint8x32) SaturatedSubMasked(y Uint8x32, z Mask8x32) Uint8x32 + +// SaturatedSub subtracts corresponding elements of two vectors with saturation. +// +// Asm: VPSUBSB, CPU Feature: AVX512EVEX +func (x Uint8x64) SaturatedSubMasked(y Uint8x64, z Mask8x64) Uint8x64 + +// SaturatedSub subtracts corresponding elements of two vectors with saturation. +// +// Asm: VPSUBSW, CPU Feature: AVX512EVEX +func (x Uint16x8) SaturatedSubMasked(y Uint16x8, z Mask16x8) Uint16x8 + +// SaturatedSub subtracts corresponding elements of two vectors with saturation. +// +// Asm: VPSUBSW, CPU Feature: AVX512EVEX +func (x Uint16x16) SaturatedSubMasked(y Uint16x16, z Mask16x16) Uint16x16 + +// SaturatedSub subtracts corresponding elements of two vectors with saturation. +// +// Asm: VPSUBSW, CPU Feature: AVX512EVEX +func (x Uint16x32) SaturatedSubMasked(y Uint16x32, z Mask16x32) Uint16x32 + +/* SaturatedUnsignedSignedPairDotProd */ + +// SaturatedPairDotProd multiplies the elements and add the pairs together with saturation, +// yielding a vector of half as many elements with twice the input element size. +// +// Asm: VPMADDUBSW, CPU Feature: AVX +func (x Uint8x16) SaturatedUnsignedSignedPairDotProd(y Int8x16) Int16x8 + +// SaturatedPairDotProd multiplies the elements and add the pairs together with saturation, +// yielding a vector of half as many elements with twice the input element size. +// +// Asm: VPMADDUBSW, CPU Feature: AVX2 +func (x Uint8x32) SaturatedUnsignedSignedPairDotProd(y Int8x32) Int16x16 + +// SaturatedPairDotProdMasked multiplies the elements and add the pairs together with saturation, +// yielding a vector of half as many elements with twice the input element size. +// +// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX +func (x Uint8x64) SaturatedUnsignedSignedPairDotProd(y Int8x64) Int16x32 + +/* SaturatedUnsignedSignedPairDotProdMasked */ -// SaturatedPairDotProd multiplies the elements and add the pairs together with saturation, +// SaturatedPairDotProdMasked multiplies the elements and add the pairs together with saturation, // yielding a vector of half as many elements with twice the input element size. // -// Asm: VPMADDUBSW, CPU Feature: AVX -func (x Uint8x16) SaturatedUnsignedSignedPairDotProd(y Int8x16) Int16x8 +// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX +func (x Uint8x16) SaturatedUnsignedSignedPairDotProdMasked(y Int8x16, z Mask16x8) Int16x8 -// SaturatedPairDotProd multiplies the elements and add the pairs together with saturation, +// SaturatedPairDotProdMasked multiplies the elements and add the pairs together with saturation, // yielding a vector of half as many elements with twice the input element size. // -// Asm: VPMADDUBSW, CPU Feature: AVX2 -func (x Uint8x32) SaturatedUnsignedSignedPairDotProd(y Int8x32) Int16x16 +// Asm: VPMADDUBSW, CPU Feature: AVX512EVEX +func (x Uint8x32) SaturatedUnsignedSignedPairDotProdMasked(y Int8x32, z Mask16x16) Int16x16 -// SaturatedPairDotProd multiplies the elements and add the pairs together with saturation, +// SaturatedPairDotProdMasked multiplies the elements and add the pairs together with saturation, // yielding a vector of half as many elements with twice the input element size. // // Asm: VPMADDUBSW, CPU Feature: AVX512EVEX -func (x Uint8x64) SaturatedUnsignedSignedPairDotProd(y Int8x64) Int16x32 +func (x Uint8x64) SaturatedUnsignedSignedPairDotProdMasked(y Int8x64, z Mask16x32) Int16x32 /* SaturatedUnsignedSignedQuadDotProdAccumulate */ @@ -7524,6 +6385,38 @@ func (x Uint32x8) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int // Asm: VPDPBUSDS, CPU Feature: AVX512EVEX func (x Uint32x16) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int8x64) Uint32x16 +/* SaturatedUnsignedSignedQuadDotProdAccumulateMasked */ + +// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. +// +// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX +func (x Int32x4) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x16, z Int8x16, u Mask32x4) Int32x4 + +// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. +// +// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX +func (x Int32x8) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x32, z Int8x32, u Mask32x8) Int32x8 + +// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. +// +// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX +func (x Int32x16) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x64, z Int8x64, u Mask32x16) Int32x16 + +// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. +// +// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX +func (x Uint32x4) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x16, z Int8x16, u Mask32x4) Uint32x4 + +// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. +// +// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX +func (x Uint32x8) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x32, z Int8x32, u Mask32x8) Uint32x8 + +// SaturatedUnsignedSignedQuadDotProdAccumulate multiplies performs dot products on groups of 4 elements of y and z and accumulates the results to x. +// +// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX +func (x Uint32x16) SaturatedUnsignedSignedQuadDotProdAccumulateMasked(y Uint8x64, z Int8x64, u Mask32x16) Uint32x16 + /* Set128 */ // Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector. @@ -7800,6 +6693,148 @@ func (x Uint64x4) ShiftAllLeftAndFillUpperFrom(imm uint8, y Uint64x4) Uint64x4 // Asm: VPSHLDQ, CPU Feature: AVX512EVEX func (x Uint64x8) ShiftAllLeftAndFillUpperFrom(imm uint8, y Uint64x8) Uint64x8 +/* ShiftAllLeftAndFillUpperFromMasked */ + +// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDW, CPU Feature: AVX512EVEX +func (x Int16x8) ShiftAllLeftAndFillUpperFromMasked(imm uint8, y Int16x8, z Mask16x8) Int16x8 + +// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDW, CPU Feature: AVX512EVEX +func (x Int16x16) ShiftAllLeftAndFillUpperFromMasked(imm uint8, y Int16x16, z Mask16x16) Int16x16 + +// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDW, CPU Feature: AVX512EVEX +func (x Int16x32) ShiftAllLeftAndFillUpperFromMasked(imm uint8, y Int16x32, z Mask16x32) Int16x32 + +// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDD, CPU Feature: AVX512EVEX +func (x Int32x4) ShiftAllLeftAndFillUpperFromMasked(imm uint8, y Int32x4, z Mask32x4) Int32x4 + +// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDD, CPU Feature: AVX512EVEX +func (x Int32x8) ShiftAllLeftAndFillUpperFromMasked(imm uint8, y Int32x8, z Mask32x8) Int32x8 + +// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDD, CPU Feature: AVX512EVEX +func (x Int32x16) ShiftAllLeftAndFillUpperFromMasked(imm uint8, y Int32x16, z Mask32x16) Int32x16 + +// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDQ, CPU Feature: AVX512EVEX +func (x Int64x2) ShiftAllLeftAndFillUpperFromMasked(imm uint8, y Int64x2, z Mask64x2) Int64x2 + +// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDQ, CPU Feature: AVX512EVEX +func (x Int64x4) ShiftAllLeftAndFillUpperFromMasked(imm uint8, y Int64x4, z Mask64x4) Int64x4 + +// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDQ, CPU Feature: AVX512EVEX +func (x Int64x8) ShiftAllLeftAndFillUpperFromMasked(imm uint8, y Int64x8, z Mask64x8) Int64x8 + +// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDW, CPU Feature: AVX512EVEX +func (x Uint16x8) ShiftAllLeftAndFillUpperFromMasked(imm uint8, y Uint16x8, z Mask16x8) Uint16x8 + +// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDW, CPU Feature: AVX512EVEX +func (x Uint16x16) ShiftAllLeftAndFillUpperFromMasked(imm uint8, y Uint16x16, z Mask16x16) Uint16x16 + +// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDW, CPU Feature: AVX512EVEX +func (x Uint16x32) ShiftAllLeftAndFillUpperFromMasked(imm uint8, y Uint16x32, z Mask16x32) Uint16x32 + +// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDD, CPU Feature: AVX512EVEX +func (x Uint32x4) ShiftAllLeftAndFillUpperFromMasked(imm uint8, y Uint32x4, z Mask32x4) Uint32x4 + +// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDD, CPU Feature: AVX512EVEX +func (x Uint32x8) ShiftAllLeftAndFillUpperFromMasked(imm uint8, y Uint32x8, z Mask32x8) Uint32x8 + +// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDD, CPU Feature: AVX512EVEX +func (x Uint32x16) ShiftAllLeftAndFillUpperFromMasked(imm uint8, y Uint32x16, z Mask32x16) Uint32x16 + +// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDQ, CPU Feature: AVX512EVEX +func (x Uint64x2) ShiftAllLeftAndFillUpperFromMasked(imm uint8, y Uint64x2, z Mask64x2) Uint64x2 + +// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDQ, CPU Feature: AVX512EVEX +func (x Uint64x4) ShiftAllLeftAndFillUpperFromMasked(imm uint8, y Uint64x4, z Mask64x4) Uint64x4 + +// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDQ, CPU Feature: AVX512EVEX +func (x Uint64x8) ShiftAllLeftAndFillUpperFromMasked(imm uint8, y Uint64x8, z Mask64x8) Uint64x8 + +/* ShiftAllLeftMasked */ + +// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLQ, CPU Feature: AVX512EVEX +func (x Int64x2) ShiftAllLeftMasked(y uint64, z Mask64x2) Int64x2 + +// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLQ, CPU Feature: AVX512EVEX +func (x Int64x4) ShiftAllLeftMasked(y uint64, z Mask64x4) Int64x4 + +// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLQ, CPU Feature: AVX512EVEX +func (x Int64x8) ShiftAllLeftMasked(y uint64, z Mask64x8) Int64x8 + +// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLQ, CPU Feature: AVX512EVEX +func (x Uint64x2) ShiftAllLeftMasked(y uint64, z Mask64x2) Uint64x2 + +// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLQ, CPU Feature: AVX512EVEX +func (x Uint64x4) ShiftAllLeftMasked(y uint64, z Mask64x4) Uint64x4 + +// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLQ, CPU Feature: AVX512EVEX +func (x Uint64x8) ShiftAllLeftMasked(y uint64, z Mask64x8) Uint64x8 + /* ShiftAllRight */ // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. @@ -7852,135 +6887,277 @@ func (x Uint16x16) ShiftAllRight(y uint64) Uint16x16 // Asm: VPSRLD, CPU Feature: AVX func (x Uint32x4) ShiftAllRight(y uint64) Uint32x4 -// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// +// Asm: VPSRLD, CPU Feature: AVX2 +func (x Uint32x8) ShiftAllRight(y uint64) Uint32x8 + +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// +// Asm: VPSRLQ, CPU Feature: AVX +func (x Uint64x2) ShiftAllRight(y uint64) Uint64x2 + +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// +// Asm: VPSRLQ, CPU Feature: AVX2 +func (x Uint64x4) ShiftAllRight(y uint64) Uint64x4 + +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// +// Asm: VPSRLQ, CPU Feature: AVX512EVEX +func (x Uint64x8) ShiftAllRight(y uint64) Uint64x8 + +/* ShiftAllRightAndFillUpperFrom */ + +// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDW, CPU Feature: AVX512EVEX +func (x Int16x8) ShiftAllRightAndFillUpperFrom(imm uint8, y Int16x8) Int16x8 + +// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDW, CPU Feature: AVX512EVEX +func (x Int16x16) ShiftAllRightAndFillUpperFrom(imm uint8, y Int16x16) Int16x16 + +// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDW, CPU Feature: AVX512EVEX +func (x Int16x32) ShiftAllRightAndFillUpperFrom(imm uint8, y Int16x32) Int16x32 + +// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDD, CPU Feature: AVX512EVEX +func (x Int32x4) ShiftAllRightAndFillUpperFrom(imm uint8, y Int32x4) Int32x4 + +// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDD, CPU Feature: AVX512EVEX +func (x Int32x8) ShiftAllRightAndFillUpperFrom(imm uint8, y Int32x8) Int32x8 + +// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDD, CPU Feature: AVX512EVEX +func (x Int32x16) ShiftAllRightAndFillUpperFrom(imm uint8, y Int32x16) Int32x16 + +// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDQ, CPU Feature: AVX512EVEX +func (x Int64x2) ShiftAllRightAndFillUpperFrom(imm uint8, y Int64x2) Int64x2 + +// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDQ, CPU Feature: AVX512EVEX +func (x Int64x4) ShiftAllRightAndFillUpperFrom(imm uint8, y Int64x4) Int64x4 + +// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDQ, CPU Feature: AVX512EVEX +func (x Int64x8) ShiftAllRightAndFillUpperFrom(imm uint8, y Int64x8) Int64x8 + +// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDW, CPU Feature: AVX512EVEX +func (x Uint16x8) ShiftAllRightAndFillUpperFrom(imm uint8, y Uint16x8) Uint16x8 + +// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDW, CPU Feature: AVX512EVEX +func (x Uint16x16) ShiftAllRightAndFillUpperFrom(imm uint8, y Uint16x16) Uint16x16 + +// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDW, CPU Feature: AVX512EVEX +func (x Uint16x32) ShiftAllRightAndFillUpperFrom(imm uint8, y Uint16x32) Uint16x32 + +// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDD, CPU Feature: AVX512EVEX +func (x Uint32x4) ShiftAllRightAndFillUpperFrom(imm uint8, y Uint32x4) Uint32x4 + +// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDD, CPU Feature: AVX512EVEX +func (x Uint32x8) ShiftAllRightAndFillUpperFrom(imm uint8, y Uint32x8) Uint32x8 + +// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. // -// Asm: VPSRLD, CPU Feature: AVX2 -func (x Uint32x8) ShiftAllRight(y uint64) Uint32x8 +// Asm: VPSHRDD, CPU Feature: AVX512EVEX +func (x Uint32x16) ShiftAllRightAndFillUpperFrom(imm uint8, y Uint32x16) Uint32x16 -// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. // -// Asm: VPSRLQ, CPU Feature: AVX -func (x Uint64x2) ShiftAllRight(y uint64) Uint64x2 +// Asm: VPSHRDQ, CPU Feature: AVX512EVEX +func (x Uint64x2) ShiftAllRightAndFillUpperFrom(imm uint8, y Uint64x2) Uint64x2 -// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. // -// Asm: VPSRLQ, CPU Feature: AVX2 -func (x Uint64x4) ShiftAllRight(y uint64) Uint64x4 +// Asm: VPSHRDQ, CPU Feature: AVX512EVEX +func (x Uint64x4) ShiftAllRightAndFillUpperFrom(imm uint8, y Uint64x4) Uint64x4 -// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. // -// Asm: VPSRLQ, CPU Feature: AVX512EVEX -func (x Uint64x8) ShiftAllRight(y uint64) Uint64x8 +// Asm: VPSHRDQ, CPU Feature: AVX512EVEX +func (x Uint64x8) ShiftAllRightAndFillUpperFrom(imm uint8, y Uint64x8) Uint64x8 -/* ShiftAllRightAndFillUpperFrom */ +/* ShiftAllRightAndFillUpperFromMasked */ // ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. // // Asm: VPSHRDW, CPU Feature: AVX512EVEX -func (x Int16x8) ShiftAllRightAndFillUpperFrom(imm uint8, y Int16x8) Int16x8 +func (x Int16x8) ShiftAllRightAndFillUpperFromMasked(imm uint8, y Int16x8, z Mask16x8) Int16x8 // ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. // // Asm: VPSHRDW, CPU Feature: AVX512EVEX -func (x Int16x16) ShiftAllRightAndFillUpperFrom(imm uint8, y Int16x16) Int16x16 +func (x Int16x16) ShiftAllRightAndFillUpperFromMasked(imm uint8, y Int16x16, z Mask16x16) Int16x16 // ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. // // Asm: VPSHRDW, CPU Feature: AVX512EVEX -func (x Int16x32) ShiftAllRightAndFillUpperFrom(imm uint8, y Int16x32) Int16x32 +func (x Int16x32) ShiftAllRightAndFillUpperFromMasked(imm uint8, y Int16x32, z Mask16x32) Int16x32 // ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. // // Asm: VPSHRDD, CPU Feature: AVX512EVEX -func (x Int32x4) ShiftAllRightAndFillUpperFrom(imm uint8, y Int32x4) Int32x4 +func (x Int32x4) ShiftAllRightAndFillUpperFromMasked(imm uint8, y Int32x4, z Mask32x4) Int32x4 // ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. // // Asm: VPSHRDD, CPU Feature: AVX512EVEX -func (x Int32x8) ShiftAllRightAndFillUpperFrom(imm uint8, y Int32x8) Int32x8 +func (x Int32x8) ShiftAllRightAndFillUpperFromMasked(imm uint8, y Int32x8, z Mask32x8) Int32x8 // ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. // // Asm: VPSHRDD, CPU Feature: AVX512EVEX -func (x Int32x16) ShiftAllRightAndFillUpperFrom(imm uint8, y Int32x16) Int32x16 +func (x Int32x16) ShiftAllRightAndFillUpperFromMasked(imm uint8, y Int32x16, z Mask32x16) Int32x16 // ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. // // Asm: VPSHRDQ, CPU Feature: AVX512EVEX -func (x Int64x2) ShiftAllRightAndFillUpperFrom(imm uint8, y Int64x2) Int64x2 +func (x Int64x2) ShiftAllRightAndFillUpperFromMasked(imm uint8, y Int64x2, z Mask64x2) Int64x2 // ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. // // Asm: VPSHRDQ, CPU Feature: AVX512EVEX -func (x Int64x4) ShiftAllRightAndFillUpperFrom(imm uint8, y Int64x4) Int64x4 +func (x Int64x4) ShiftAllRightAndFillUpperFromMasked(imm uint8, y Int64x4, z Mask64x4) Int64x4 // ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. // // Asm: VPSHRDQ, CPU Feature: AVX512EVEX -func (x Int64x8) ShiftAllRightAndFillUpperFrom(imm uint8, y Int64x8) Int64x8 +func (x Int64x8) ShiftAllRightAndFillUpperFromMasked(imm uint8, y Int64x8, z Mask64x8) Int64x8 // ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. // // Asm: VPSHRDW, CPU Feature: AVX512EVEX -func (x Uint16x8) ShiftAllRightAndFillUpperFrom(imm uint8, y Uint16x8) Uint16x8 +func (x Uint16x8) ShiftAllRightAndFillUpperFromMasked(imm uint8, y Uint16x8, z Mask16x8) Uint16x8 // ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. // // Asm: VPSHRDW, CPU Feature: AVX512EVEX -func (x Uint16x16) ShiftAllRightAndFillUpperFrom(imm uint8, y Uint16x16) Uint16x16 +func (x Uint16x16) ShiftAllRightAndFillUpperFromMasked(imm uint8, y Uint16x16, z Mask16x16) Uint16x16 // ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. // // Asm: VPSHRDW, CPU Feature: AVX512EVEX -func (x Uint16x32) ShiftAllRightAndFillUpperFrom(imm uint8, y Uint16x32) Uint16x32 +func (x Uint16x32) ShiftAllRightAndFillUpperFromMasked(imm uint8, y Uint16x32, z Mask16x32) Uint16x32 // ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. // // Asm: VPSHRDD, CPU Feature: AVX512EVEX -func (x Uint32x4) ShiftAllRightAndFillUpperFrom(imm uint8, y Uint32x4) Uint32x4 +func (x Uint32x4) ShiftAllRightAndFillUpperFromMasked(imm uint8, y Uint32x4, z Mask32x4) Uint32x4 // ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. // // Asm: VPSHRDD, CPU Feature: AVX512EVEX -func (x Uint32x8) ShiftAllRightAndFillUpperFrom(imm uint8, y Uint32x8) Uint32x8 +func (x Uint32x8) ShiftAllRightAndFillUpperFromMasked(imm uint8, y Uint32x8, z Mask32x8) Uint32x8 // ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. // // Asm: VPSHRDD, CPU Feature: AVX512EVEX -func (x Uint32x16) ShiftAllRightAndFillUpperFrom(imm uint8, y Uint32x16) Uint32x16 +func (x Uint32x16) ShiftAllRightAndFillUpperFromMasked(imm uint8, y Uint32x16, z Mask32x16) Uint32x16 // ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. // // Asm: VPSHRDQ, CPU Feature: AVX512EVEX -func (x Uint64x2) ShiftAllRightAndFillUpperFrom(imm uint8, y Uint64x2) Uint64x2 +func (x Uint64x2) ShiftAllRightAndFillUpperFromMasked(imm uint8, y Uint64x2, z Mask64x2) Uint64x2 // ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. // // Asm: VPSHRDQ, CPU Feature: AVX512EVEX -func (x Uint64x4) ShiftAllRightAndFillUpperFrom(imm uint8, y Uint64x4) Uint64x4 +func (x Uint64x4) ShiftAllRightAndFillUpperFromMasked(imm uint8, y Uint64x4, z Mask64x4) Uint64x4 // ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. // // Asm: VPSHRDQ, CPU Feature: AVX512EVEX -func (x Uint64x8) ShiftAllRightAndFillUpperFrom(imm uint8, y Uint64x8) Uint64x8 +func (x Uint64x8) ShiftAllRightAndFillUpperFromMasked(imm uint8, y Uint64x8, z Mask64x8) Uint64x8 + +/* ShiftAllRightMasked */ + +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// +// Asm: VPSRLQ, CPU Feature: AVX512EVEX +func (x Int64x2) ShiftAllRightMasked(y uint64, z Mask64x2) Int64x2 + +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// +// Asm: VPSRLQ, CPU Feature: AVX512EVEX +func (x Int64x4) ShiftAllRightMasked(y uint64, z Mask64x4) Int64x4 + +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// +// Asm: VPSRLQ, CPU Feature: AVX512EVEX +func (x Int64x8) ShiftAllRightMasked(y uint64, z Mask64x8) Int64x8 + +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// +// Asm: VPSRLQ, CPU Feature: AVX512EVEX +func (x Uint64x2) ShiftAllRightMasked(y uint64, z Mask64x2) Uint64x2 + +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// +// Asm: VPSRLQ, CPU Feature: AVX512EVEX +func (x Uint64x4) ShiftAllRightMasked(y uint64, z Mask64x4) Uint64x4 + +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// +// Asm: VPSRLQ, CPU Feature: AVX512EVEX +func (x Uint64x8) ShiftAllRightMasked(y uint64, z Mask64x8) Uint64x8 /* ShiftAllRightSignExtended */ @@ -8019,6 +7196,23 @@ func (x Int64x4) ShiftAllRightSignExtended(y uint64) Int64x4 // Asm: VPSRAQ, CPU Feature: AVX512EVEX func (x Int64x8) ShiftAllRightSignExtended(y uint64) Int64x8 +/* ShiftAllRightSignExtendedMasked */ + +// ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAQ, CPU Feature: AVX512EVEX +func (x Int64x2) ShiftAllRightSignExtendedMasked(y uint64, z Mask64x2) Int64x2 + +// ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAQ, CPU Feature: AVX512EVEX +func (x Int64x4) ShiftAllRightSignExtendedMasked(y uint64, z Mask64x4) Int64x4 + +// ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAQ, CPU Feature: AVX512EVEX +func (x Int64x8) ShiftAllRightSignExtendedMasked(y uint64, z Mask64x8) Int64x8 + /* ShiftLeft */ // ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. @@ -8209,17 +7403,219 @@ func (x Uint32x16) ShiftLeftAndFillUpperFrom(y Uint32x16, z Uint32x16) Uint32x16 // Asm: VPSHLDVQ, CPU Feature: AVX512EVEX func (x Uint64x2) ShiftLeftAndFillUpperFrom(y Uint64x2, z Uint64x2) Uint64x2 -// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVQ, CPU Feature: AVX512EVEX +func (x Uint64x4) ShiftLeftAndFillUpperFrom(y Uint64x4, z Uint64x4) Uint64x4 + +// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVQ, CPU Feature: AVX512EVEX +func (x Uint64x8) ShiftLeftAndFillUpperFrom(y Uint64x8, z Uint64x8) Uint64x8 + +/* ShiftLeftAndFillUpperFromMasked */ + +// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVW, CPU Feature: AVX512EVEX +func (x Int16x8) ShiftLeftAndFillUpperFromMasked(y Int16x8, z Int16x8, u Mask16x8) Int16x8 + +// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVW, CPU Feature: AVX512EVEX +func (x Int16x16) ShiftLeftAndFillUpperFromMasked(y Int16x16, z Int16x16, u Mask16x16) Int16x16 + +// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVW, CPU Feature: AVX512EVEX +func (x Int16x32) ShiftLeftAndFillUpperFromMasked(y Int16x32, z Int16x32, u Mask16x32) Int16x32 + +// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVD, CPU Feature: AVX512EVEX +func (x Int32x4) ShiftLeftAndFillUpperFromMasked(y Int32x4, z Int32x4, u Mask32x4) Int32x4 + +// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVD, CPU Feature: AVX512EVEX +func (x Int32x8) ShiftLeftAndFillUpperFromMasked(y Int32x8, z Int32x8, u Mask32x8) Int32x8 + +// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVD, CPU Feature: AVX512EVEX +func (x Int32x16) ShiftLeftAndFillUpperFromMasked(y Int32x16, z Int32x16, u Mask32x16) Int32x16 + +// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVQ, CPU Feature: AVX512EVEX +func (x Int64x2) ShiftLeftAndFillUpperFromMasked(y Int64x2, z Int64x2, u Mask64x2) Int64x2 + +// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVQ, CPU Feature: AVX512EVEX +func (x Int64x4) ShiftLeftAndFillUpperFromMasked(y Int64x4, z Int64x4, u Mask64x4) Int64x4 + +// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVQ, CPU Feature: AVX512EVEX +func (x Int64x8) ShiftLeftAndFillUpperFromMasked(y Int64x8, z Int64x8, u Mask64x8) Int64x8 + +// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVW, CPU Feature: AVX512EVEX +func (x Uint16x8) ShiftLeftAndFillUpperFromMasked(y Uint16x8, z Uint16x8, u Mask16x8) Uint16x8 + +// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVW, CPU Feature: AVX512EVEX +func (x Uint16x16) ShiftLeftAndFillUpperFromMasked(y Uint16x16, z Uint16x16, u Mask16x16) Uint16x16 + +// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVW, CPU Feature: AVX512EVEX +func (x Uint16x32) ShiftLeftAndFillUpperFromMasked(y Uint16x32, z Uint16x32, u Mask16x32) Uint16x32 + +// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVD, CPU Feature: AVX512EVEX +func (x Uint32x4) ShiftLeftAndFillUpperFromMasked(y Uint32x4, z Uint32x4, u Mask32x4) Uint32x4 + +// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVD, CPU Feature: AVX512EVEX +func (x Uint32x8) ShiftLeftAndFillUpperFromMasked(y Uint32x8, z Uint32x8, u Mask32x8) Uint32x8 + +// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVD, CPU Feature: AVX512EVEX +func (x Uint32x16) ShiftLeftAndFillUpperFromMasked(y Uint32x16, z Uint32x16, u Mask32x16) Uint32x16 + +// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVQ, CPU Feature: AVX512EVEX +func (x Uint64x2) ShiftLeftAndFillUpperFromMasked(y Uint64x2, z Uint64x2, u Mask64x2) Uint64x2 + +// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVQ, CPU Feature: AVX512EVEX +func (x Uint64x4) ShiftLeftAndFillUpperFromMasked(y Uint64x4, z Uint64x4, u Mask64x4) Uint64x4 + +// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVQ, CPU Feature: AVX512EVEX +func (x Uint64x8) ShiftLeftAndFillUpperFromMasked(y Uint64x8, z Uint64x8, u Mask64x8) Uint64x8 + +/* ShiftLeftMasked */ + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVW, CPU Feature: AVX512EVEX +func (x Int16x8) ShiftLeftMasked(y Int16x8, z Mask16x8) Int16x8 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVW, CPU Feature: AVX512EVEX +func (x Int16x16) ShiftLeftMasked(y Int16x16, z Mask16x16) Int16x16 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVW, CPU Feature: AVX512EVEX +func (x Int16x32) ShiftLeftMasked(y Int16x32, z Mask16x32) Int16x32 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVD, CPU Feature: AVX512EVEX +func (x Int32x4) ShiftLeftMasked(y Int32x4, z Mask32x4) Int32x4 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVD, CPU Feature: AVX512EVEX +func (x Int32x8) ShiftLeftMasked(y Int32x8, z Mask32x8) Int32x8 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVD, CPU Feature: AVX512EVEX +func (x Int32x16) ShiftLeftMasked(y Int32x16, z Mask32x16) Int32x16 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVQ, CPU Feature: AVX512EVEX +func (x Int64x2) ShiftLeftMasked(y Int64x2, z Mask64x2) Int64x2 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVQ, CPU Feature: AVX512EVEX +func (x Int64x4) ShiftLeftMasked(y Int64x4, z Mask64x4) Int64x4 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVQ, CPU Feature: AVX512EVEX +func (x Int64x8) ShiftLeftMasked(y Int64x8, z Mask64x8) Int64x8 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVW, CPU Feature: AVX512EVEX +func (x Uint16x8) ShiftLeftMasked(y Uint16x8, z Mask16x8) Uint16x8 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVW, CPU Feature: AVX512EVEX +func (x Uint16x16) ShiftLeftMasked(y Uint16x16, z Mask16x16) Uint16x16 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVW, CPU Feature: AVX512EVEX +func (x Uint16x32) ShiftLeftMasked(y Uint16x32, z Mask16x32) Uint16x32 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVD, CPU Feature: AVX512EVEX +func (x Uint32x4) ShiftLeftMasked(y Uint32x4, z Mask32x4) Uint32x4 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVD, CPU Feature: AVX512EVEX +func (x Uint32x8) ShiftLeftMasked(y Uint32x8, z Mask32x8) Uint32x8 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVD, CPU Feature: AVX512EVEX +func (x Uint32x16) ShiftLeftMasked(y Uint32x16, z Mask32x16) Uint32x16 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVQ, CPU Feature: AVX512EVEX +func (x Uint64x2) ShiftLeftMasked(y Uint64x2, z Mask64x2) Uint64x2 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. // -// Asm: VPSHLDVQ, CPU Feature: AVX512EVEX -func (x Uint64x4) ShiftLeftAndFillUpperFrom(y Uint64x4, z Uint64x4) Uint64x4 +// Asm: VPSLLVQ, CPU Feature: AVX512EVEX +func (x Uint64x4) ShiftLeftMasked(y Uint64x4, z Mask64x4) Uint64x4 -// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the -// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. // -// Asm: VPSHLDVQ, CPU Feature: AVX512EVEX -func (x Uint64x8) ShiftLeftAndFillUpperFrom(y Uint64x8, z Uint64x8) Uint64x8 +// Asm: VPSLLVQ, CPU Feature: AVX512EVEX +func (x Uint64x8) ShiftLeftMasked(y Uint64x8, z Mask64x8) Uint64x8 /* ShiftRight */ @@ -8423,6 +7819,208 @@ func (x Uint64x4) ShiftRightAndFillUpperFrom(y Uint64x4, z Uint64x4) Uint64x4 // Asm: VPSHRDVQ, CPU Feature: AVX512EVEX func (x Uint64x8) ShiftRightAndFillUpperFrom(y Uint64x8, z Uint64x8) Uint64x8 +/* ShiftRightAndFillUpperFromMasked */ + +// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVW, CPU Feature: AVX512EVEX +func (x Int16x8) ShiftRightAndFillUpperFromMasked(y Int16x8, z Int16x8, u Mask16x8) Int16x8 + +// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVW, CPU Feature: AVX512EVEX +func (x Int16x16) ShiftRightAndFillUpperFromMasked(y Int16x16, z Int16x16, u Mask16x16) Int16x16 + +// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVW, CPU Feature: AVX512EVEX +func (x Int16x32) ShiftRightAndFillUpperFromMasked(y Int16x32, z Int16x32, u Mask16x32) Int16x32 + +// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVD, CPU Feature: AVX512EVEX +func (x Int32x4) ShiftRightAndFillUpperFromMasked(y Int32x4, z Int32x4, u Mask32x4) Int32x4 + +// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVD, CPU Feature: AVX512EVEX +func (x Int32x8) ShiftRightAndFillUpperFromMasked(y Int32x8, z Int32x8, u Mask32x8) Int32x8 + +// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVD, CPU Feature: AVX512EVEX +func (x Int32x16) ShiftRightAndFillUpperFromMasked(y Int32x16, z Int32x16, u Mask32x16) Int32x16 + +// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVQ, CPU Feature: AVX512EVEX +func (x Int64x2) ShiftRightAndFillUpperFromMasked(y Int64x2, z Int64x2, u Mask64x2) Int64x2 + +// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVQ, CPU Feature: AVX512EVEX +func (x Int64x4) ShiftRightAndFillUpperFromMasked(y Int64x4, z Int64x4, u Mask64x4) Int64x4 + +// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVQ, CPU Feature: AVX512EVEX +func (x Int64x8) ShiftRightAndFillUpperFromMasked(y Int64x8, z Int64x8, u Mask64x8) Int64x8 + +// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVW, CPU Feature: AVX512EVEX +func (x Uint16x8) ShiftRightAndFillUpperFromMasked(y Uint16x8, z Uint16x8, u Mask16x8) Uint16x8 + +// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVW, CPU Feature: AVX512EVEX +func (x Uint16x16) ShiftRightAndFillUpperFromMasked(y Uint16x16, z Uint16x16, u Mask16x16) Uint16x16 + +// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVW, CPU Feature: AVX512EVEX +func (x Uint16x32) ShiftRightAndFillUpperFromMasked(y Uint16x32, z Uint16x32, u Mask16x32) Uint16x32 + +// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVD, CPU Feature: AVX512EVEX +func (x Uint32x4) ShiftRightAndFillUpperFromMasked(y Uint32x4, z Uint32x4, u Mask32x4) Uint32x4 + +// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVD, CPU Feature: AVX512EVEX +func (x Uint32x8) ShiftRightAndFillUpperFromMasked(y Uint32x8, z Uint32x8, u Mask32x8) Uint32x8 + +// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVD, CPU Feature: AVX512EVEX +func (x Uint32x16) ShiftRightAndFillUpperFromMasked(y Uint32x16, z Uint32x16, u Mask32x16) Uint32x16 + +// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVQ, CPU Feature: AVX512EVEX +func (x Uint64x2) ShiftRightAndFillUpperFromMasked(y Uint64x2, z Uint64x2, u Mask64x2) Uint64x2 + +// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVQ, CPU Feature: AVX512EVEX +func (x Uint64x4) ShiftRightAndFillUpperFromMasked(y Uint64x4, z Uint64x4, u Mask64x4) Uint64x4 + +// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVQ, CPU Feature: AVX512EVEX +func (x Uint64x8) ShiftRightAndFillUpperFromMasked(y Uint64x8, z Uint64x8, u Mask64x8) Uint64x8 + +/* ShiftRightMasked */ + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVW, CPU Feature: AVX512EVEX +func (x Int16x8) ShiftRightMasked(y Int16x8, z Mask16x8) Int16x8 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVW, CPU Feature: AVX512EVEX +func (x Int16x16) ShiftRightMasked(y Int16x16, z Mask16x16) Int16x16 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVW, CPU Feature: AVX512EVEX +func (x Int16x32) ShiftRightMasked(y Int16x32, z Mask16x32) Int16x32 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVD, CPU Feature: AVX512EVEX +func (x Int32x4) ShiftRightMasked(y Int32x4, z Mask32x4) Int32x4 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVD, CPU Feature: AVX512EVEX +func (x Int32x8) ShiftRightMasked(y Int32x8, z Mask32x8) Int32x8 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVD, CPU Feature: AVX512EVEX +func (x Int32x16) ShiftRightMasked(y Int32x16, z Mask32x16) Int32x16 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVQ, CPU Feature: AVX512EVEX +func (x Int64x2) ShiftRightMasked(y Int64x2, z Mask64x2) Int64x2 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVQ, CPU Feature: AVX512EVEX +func (x Int64x4) ShiftRightMasked(y Int64x4, z Mask64x4) Int64x4 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVQ, CPU Feature: AVX512EVEX +func (x Int64x8) ShiftRightMasked(y Int64x8, z Mask64x8) Int64x8 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVW, CPU Feature: AVX512EVEX +func (x Uint16x8) ShiftRightMasked(y Uint16x8, z Mask16x8) Uint16x8 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVW, CPU Feature: AVX512EVEX +func (x Uint16x16) ShiftRightMasked(y Uint16x16, z Mask16x16) Uint16x16 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVW, CPU Feature: AVX512EVEX +func (x Uint16x32) ShiftRightMasked(y Uint16x32, z Mask16x32) Uint16x32 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVD, CPU Feature: AVX512EVEX +func (x Uint32x4) ShiftRightMasked(y Uint32x4, z Mask32x4) Uint32x4 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVD, CPU Feature: AVX512EVEX +func (x Uint32x8) ShiftRightMasked(y Uint32x8, z Mask32x8) Uint32x8 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVD, CPU Feature: AVX512EVEX +func (x Uint32x16) ShiftRightMasked(y Uint32x16, z Mask32x16) Uint32x16 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVQ, CPU Feature: AVX512EVEX +func (x Uint64x2) ShiftRightMasked(y Uint64x2, z Mask64x2) Uint64x2 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVQ, CPU Feature: AVX512EVEX +func (x Uint64x4) ShiftRightMasked(y Uint64x4, z Mask64x4) Uint64x4 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVQ, CPU Feature: AVX512EVEX +func (x Uint64x8) ShiftRightMasked(y Uint64x8, z Mask64x8) Uint64x8 + /* ShiftRightSignExtended */ // ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. @@ -8483,37 +8081,129 @@ func (x Uint16x16) ShiftRightSignExtended(y Uint16x16) Uint16x16 // ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. // // Asm: VPSRAVW, CPU Feature: AVX512EVEX -func (x Uint16x32) ShiftRightSignExtended(y Uint16x32) Uint16x32 +func (x Uint16x32) ShiftRightSignExtended(y Uint16x32) Uint16x32 + +// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVD, CPU Feature: AVX2 +func (x Uint32x4) ShiftRightSignExtended(y Uint32x4) Uint32x4 + +// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVD, CPU Feature: AVX2 +func (x Uint32x8) ShiftRightSignExtended(y Uint32x8) Uint32x8 + +// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVD, CPU Feature: AVX512EVEX +func (x Uint32x16) ShiftRightSignExtended(y Uint32x16) Uint32x16 + +// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVQ, CPU Feature: AVX512EVEX +func (x Uint64x2) ShiftRightSignExtended(y Uint64x2) Uint64x2 + +// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVQ, CPU Feature: AVX512EVEX +func (x Uint64x4) ShiftRightSignExtended(y Uint64x4) Uint64x4 + +// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVQ, CPU Feature: AVX512EVEX +func (x Uint64x8) ShiftRightSignExtended(y Uint64x8) Uint64x8 + +/* ShiftRightSignExtendedMasked */ + +// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVW, CPU Feature: AVX512EVEX +func (x Int16x8) ShiftRightSignExtendedMasked(y Int16x8, z Mask16x8) Int16x8 + +// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVW, CPU Feature: AVX512EVEX +func (x Int16x16) ShiftRightSignExtendedMasked(y Int16x16, z Mask16x16) Int16x16 + +// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVW, CPU Feature: AVX512EVEX +func (x Int16x32) ShiftRightSignExtendedMasked(y Int16x32, z Mask16x32) Int16x32 + +// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVD, CPU Feature: AVX512EVEX +func (x Int32x4) ShiftRightSignExtendedMasked(y Int32x4, z Mask32x4) Int32x4 + +// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVD, CPU Feature: AVX512EVEX +func (x Int32x8) ShiftRightSignExtendedMasked(y Int32x8, z Mask32x8) Int32x8 + +// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVD, CPU Feature: AVX512EVEX +func (x Int32x16) ShiftRightSignExtendedMasked(y Int32x16, z Mask32x16) Int32x16 + +// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVQ, CPU Feature: AVX512EVEX +func (x Int64x2) ShiftRightSignExtendedMasked(y Int64x2, z Mask64x2) Int64x2 + +// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVQ, CPU Feature: AVX512EVEX +func (x Int64x4) ShiftRightSignExtendedMasked(y Int64x4, z Mask64x4) Int64x4 + +// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVQ, CPU Feature: AVX512EVEX +func (x Int64x8) ShiftRightSignExtendedMasked(y Int64x8, z Mask64x8) Int64x8 + +// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVW, CPU Feature: AVX512EVEX +func (x Uint16x8) ShiftRightSignExtendedMasked(y Uint16x8, z Mask16x8) Uint16x8 + +// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVW, CPU Feature: AVX512EVEX +func (x Uint16x16) ShiftRightSignExtendedMasked(y Uint16x16, z Mask16x16) Uint16x16 + +// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVW, CPU Feature: AVX512EVEX +func (x Uint16x32) ShiftRightSignExtendedMasked(y Uint16x32, z Mask16x32) Uint16x32 // ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. // -// Asm: VPSRAVD, CPU Feature: AVX2 -func (x Uint32x4) ShiftRightSignExtended(y Uint32x4) Uint32x4 +// Asm: VPSRAVD, CPU Feature: AVX512EVEX +func (x Uint32x4) ShiftRightSignExtendedMasked(y Uint32x4, z Mask32x4) Uint32x4 // ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. // -// Asm: VPSRAVD, CPU Feature: AVX2 -func (x Uint32x8) ShiftRightSignExtended(y Uint32x8) Uint32x8 +// Asm: VPSRAVD, CPU Feature: AVX512EVEX +func (x Uint32x8) ShiftRightSignExtendedMasked(y Uint32x8, z Mask32x8) Uint32x8 // ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. // // Asm: VPSRAVD, CPU Feature: AVX512EVEX -func (x Uint32x16) ShiftRightSignExtended(y Uint32x16) Uint32x16 +func (x Uint32x16) ShiftRightSignExtendedMasked(y Uint32x16, z Mask32x16) Uint32x16 // ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. // // Asm: VPSRAVQ, CPU Feature: AVX512EVEX -func (x Uint64x2) ShiftRightSignExtended(y Uint64x2) Uint64x2 +func (x Uint64x2) ShiftRightSignExtendedMasked(y Uint64x2, z Mask64x2) Uint64x2 // ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. // // Asm: VPSRAVQ, CPU Feature: AVX512EVEX -func (x Uint64x4) ShiftRightSignExtended(y Uint64x4) Uint64x4 +func (x Uint64x4) ShiftRightSignExtendedMasked(y Uint64x4, z Mask64x4) Uint64x4 // ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. // // Asm: VPSRAVQ, CPU Feature: AVX512EVEX -func (x Uint64x8) ShiftRightSignExtended(y Uint64x8) Uint64x8 +func (x Uint64x8) ShiftRightSignExtendedMasked(y Uint64x8, z Mask64x8) Uint64x8 /* Sign */ @@ -8585,6 +8275,38 @@ func (x Float64x4) Sqrt() Float64x4 // Asm: VSQRTPD, CPU Feature: AVX512EVEX func (x Float64x8) Sqrt() Float64x8 +/* SqrtMasked */ + +// Sqrt computes the square root of each element. +// +// Asm: VSQRTPS, CPU Feature: AVX512EVEX +func (x Float32x4) SqrtMasked(y Mask32x4) Float32x4 + +// Sqrt computes the square root of each element. +// +// Asm: VSQRTPS, CPU Feature: AVX512EVEX +func (x Float32x8) SqrtMasked(y Mask32x8) Float32x8 + +// Sqrt computes the square root of each element. +// +// Asm: VSQRTPS, CPU Feature: AVX512EVEX +func (x Float32x16) SqrtMasked(y Mask32x16) Float32x16 + +// Sqrt computes the square root of each element. +// +// Asm: VSQRTPD, CPU Feature: AVX512EVEX +func (x Float64x2) SqrtMasked(y Mask64x2) Float64x2 + +// Sqrt computes the square root of each element. +// +// Asm: VSQRTPD, CPU Feature: AVX512EVEX +func (x Float64x4) SqrtMasked(y Mask64x4) Float64x4 + +// Sqrt computes the square root of each element. +// +// Asm: VSQRTPD, CPU Feature: AVX512EVEX +func (x Float64x8) SqrtMasked(y Mask64x8) Float64x8 + /* Sub */ // Sub subtracts corresponding elements of two vectors. @@ -8737,6 +8459,158 @@ func (x Uint64x4) Sub(y Uint64x4) Uint64x4 // Asm: VPSUBQ, CPU Feature: AVX512EVEX func (x Uint64x8) Sub(y Uint64x8) Uint64x8 +/* SubMasked */ + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VSUBPS, CPU Feature: AVX512EVEX +func (x Float32x4) SubMasked(y Float32x4, z Mask32x4) Float32x4 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VSUBPS, CPU Feature: AVX512EVEX +func (x Float32x8) SubMasked(y Float32x8, z Mask32x8) Float32x8 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VSUBPS, CPU Feature: AVX512EVEX +func (x Float32x16) SubMasked(y Float32x16, z Mask32x16) Float32x16 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VSUBPD, CPU Feature: AVX512EVEX +func (x Float64x2) SubMasked(y Float64x2, z Mask64x2) Float64x2 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VSUBPD, CPU Feature: AVX512EVEX +func (x Float64x4) SubMasked(y Float64x4, z Mask64x4) Float64x4 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VSUBPD, CPU Feature: AVX512EVEX +func (x Float64x8) SubMasked(y Float64x8, z Mask64x8) Float64x8 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBB, CPU Feature: AVX512EVEX +func (x Int8x16) SubMasked(y Int8x16, z Mask8x16) Int8x16 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBB, CPU Feature: AVX512EVEX +func (x Int8x32) SubMasked(y Int8x32, z Mask8x32) Int8x32 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBB, CPU Feature: AVX512EVEX +func (x Int8x64) SubMasked(y Int8x64, z Mask8x64) Int8x64 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBW, CPU Feature: AVX512EVEX +func (x Int16x8) SubMasked(y Int16x8, z Mask16x8) Int16x8 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBW, CPU Feature: AVX512EVEX +func (x Int16x16) SubMasked(y Int16x16, z Mask16x16) Int16x16 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBW, CPU Feature: AVX512EVEX +func (x Int16x32) SubMasked(y Int16x32, z Mask16x32) Int16x32 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBD, CPU Feature: AVX512EVEX +func (x Int32x4) SubMasked(y Int32x4, z Mask32x4) Int32x4 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBD, CPU Feature: AVX512EVEX +func (x Int32x8) SubMasked(y Int32x8, z Mask32x8) Int32x8 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBD, CPU Feature: AVX512EVEX +func (x Int32x16) SubMasked(y Int32x16, z Mask32x16) Int32x16 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBQ, CPU Feature: AVX512EVEX +func (x Int64x2) SubMasked(y Int64x2, z Mask64x2) Int64x2 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBQ, CPU Feature: AVX512EVEX +func (x Int64x4) SubMasked(y Int64x4, z Mask64x4) Int64x4 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBQ, CPU Feature: AVX512EVEX +func (x Int64x8) SubMasked(y Int64x8, z Mask64x8) Int64x8 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBB, CPU Feature: AVX512EVEX +func (x Uint8x16) SubMasked(y Uint8x16, z Mask8x16) Uint8x16 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBB, CPU Feature: AVX512EVEX +func (x Uint8x32) SubMasked(y Uint8x32, z Mask8x32) Uint8x32 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBB, CPU Feature: AVX512EVEX +func (x Uint8x64) SubMasked(y Uint8x64, z Mask8x64) Uint8x64 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBW, CPU Feature: AVX512EVEX +func (x Uint16x8) SubMasked(y Uint16x8, z Mask16x8) Uint16x8 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBW, CPU Feature: AVX512EVEX +func (x Uint16x16) SubMasked(y Uint16x16, z Mask16x16) Uint16x16 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBW, CPU Feature: AVX512EVEX +func (x Uint16x32) SubMasked(y Uint16x32, z Mask16x32) Uint16x32 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBD, CPU Feature: AVX512EVEX +func (x Uint32x4) SubMasked(y Uint32x4, z Mask32x4) Uint32x4 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBD, CPU Feature: AVX512EVEX +func (x Uint32x8) SubMasked(y Uint32x8, z Mask32x8) Uint32x8 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBD, CPU Feature: AVX512EVEX +func (x Uint32x16) SubMasked(y Uint32x16, z Mask32x16) Uint32x16 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBQ, CPU Feature: AVX512EVEX +func (x Uint64x2) SubMasked(y Uint64x2, z Mask64x2) Uint64x2 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBQ, CPU Feature: AVX512EVEX +func (x Uint64x4) SubMasked(y Uint64x4, z Mask64x4) Uint64x4 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBQ, CPU Feature: AVX512EVEX +func (x Uint64x8) SubMasked(y Uint64x8, z Mask64x8) Uint64x8 + /* Trunc */ // Trunc truncates elements towards zero. @@ -8791,6 +8665,38 @@ func (x Float64x4) TruncWithPrecision(imm uint8) Float64x4 // Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX func (x Float64x8) TruncWithPrecision(imm uint8) Float64x8 +/* TruncWithPrecisionMasked */ + +// TruncWithPrecision truncates elements with specified precision. +// +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x4) TruncWithPrecisionMasked(imm uint8, y Mask32x4) Float32x4 + +// TruncWithPrecision truncates elements with specified precision. +// +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x8) TruncWithPrecisionMasked(imm uint8, y Mask32x8) Float32x8 + +// TruncWithPrecision truncates elements with specified precision. +// +// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX +func (x Float32x16) TruncWithPrecisionMasked(imm uint8, y Mask32x16) Float32x16 + +// TruncWithPrecision truncates elements with specified precision. +// +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x2) TruncWithPrecisionMasked(imm uint8, y Mask64x2) Float64x2 + +// TruncWithPrecision truncates elements with specified precision. +// +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x4) TruncWithPrecisionMasked(imm uint8, y Mask64x4) Float64x4 + +// TruncWithPrecision truncates elements with specified precision. +// +// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX +func (x Float64x8) TruncWithPrecisionMasked(imm uint8, y Mask64x8) Float64x8 + /* UnsignedSignedQuadDotProdAccumulate */ // UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. @@ -8823,6 +8729,38 @@ func (x Uint32x8) UnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int8x32) Uin // Asm: VPDPBUSD, CPU Feature: AVX512EVEX func (x Uint32x16) UnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int8x64) Uint32x16 +/* UnsignedSignedQuadDotProdAccumulateMasked */ + +// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. +// +// Asm: VPDPBUSD, CPU Feature: AVX512EVEX +func (x Int32x4) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x16, z Int8x16, u Mask32x4) Int32x4 + +// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. +// +// Asm: VPDPBUSD, CPU Feature: AVX512EVEX +func (x Int32x8) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x32, z Int8x32, u Mask32x8) Int32x8 + +// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. +// +// Asm: VPDPBUSD, CPU Feature: AVX512EVEX +func (x Int32x16) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x64, z Int8x64, u Mask32x16) Int32x16 + +// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. +// +// Asm: VPDPBUSD, CPU Feature: AVX512EVEX +func (x Uint32x4) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x16, z Int8x16, u Mask32x4) Uint32x4 + +// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. +// +// Asm: VPDPBUSD, CPU Feature: AVX512EVEX +func (x Uint32x8) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x32, z Int8x32, u Mask32x8) Uint32x8 + +// UnsignedSignedQuadDotProdAccumulate performs dot products on groups of 4 elements of y and z and accumulates the results to x. +// +// Asm: VPDPBUSD, CPU Feature: AVX512EVEX +func (x Uint32x16) UnsignedSignedQuadDotProdAccumulateMasked(y Uint8x64, z Int8x64, u Mask32x16) Uint32x16 + /* Xor */ // Xor performs a bitwise XOR operation between two vectors. @@ -8925,6 +8863,68 @@ func (x Uint64x4) Xor(y Uint64x4) Uint64x4 // Asm: VPXORQ, CPU Feature: AVX512EVEX func (x Uint64x8) Xor(y Uint64x8) Uint64x8 +/* XorMasked */ + +// Xor performs a masked bitwise XOR operation between two vectors. +// +// Asm: VPXORD, CPU Feature: AVX512EVEX +func (x Int32x4) XorMasked(y Int32x4, z Mask32x4) Int32x4 + +// Xor performs a masked bitwise XOR operation between two vectors. +// +// Asm: VPXORD, CPU Feature: AVX512EVEX +func (x Int32x8) XorMasked(y Int32x8, z Mask32x8) Int32x8 + +// Xor performs a masked bitwise XOR operation between two vectors. +// +// Asm: VPXORD, CPU Feature: AVX512EVEX +func (x Int32x16) XorMasked(y Int32x16, z Mask32x16) Int32x16 + +// Xor performs a masked bitwise XOR operation between two vectors. +// +// Asm: VPXORQ, CPU Feature: AVX512EVEX +func (x Int64x2) XorMasked(y Int64x2, z Mask64x2) Int64x2 + +// Xor performs a masked bitwise XOR operation between two vectors. +// +// Asm: VPXORQ, CPU Feature: AVX512EVEX +func (x Int64x4) XorMasked(y Int64x4, z Mask64x4) Int64x4 + +// Xor performs a masked bitwise XOR operation between two vectors. +// +// Asm: VPXORQ, CPU Feature: AVX512EVEX +func (x Int64x8) XorMasked(y Int64x8, z Mask64x8) Int64x8 + +// Xor performs a masked bitwise XOR operation between two vectors. +// +// Asm: VPXORD, CPU Feature: AVX512EVEX +func (x Uint32x4) XorMasked(y Uint32x4, z Mask32x4) Uint32x4 + +// Xor performs a masked bitwise XOR operation between two vectors. +// +// Asm: VPXORD, CPU Feature: AVX512EVEX +func (x Uint32x8) XorMasked(y Uint32x8, z Mask32x8) Uint32x8 + +// Xor performs a masked bitwise XOR operation between two vectors. +// +// Asm: VPXORD, CPU Feature: AVX512EVEX +func (x Uint32x16) XorMasked(y Uint32x16, z Mask32x16) Uint32x16 + +// Xor performs a masked bitwise XOR operation between two vectors. +// +// Asm: VPXORQ, CPU Feature: AVX512EVEX +func (x Uint64x2) XorMasked(y Uint64x2, z Mask64x2) Uint64x2 + +// Xor performs a masked bitwise XOR operation between two vectors. +// +// Asm: VPXORQ, CPU Feature: AVX512EVEX +func (x Uint64x4) XorMasked(y Uint64x4, z Mask64x4) Uint64x4 + +// Xor performs a masked bitwise XOR operation between two vectors. +// +// Asm: VPXORQ, CPU Feature: AVX512EVEX +func (x Uint64x8) XorMasked(y Uint64x8, z Mask64x8) Uint64x8 + // Float64x2 converts from Float32x4 to Float64x2 func (from Float32x4) AsFloat64x2() (to Float64x2) diff --git a/src/simd/simd_test.go b/src/simd/simd_test.go index e2324e8da5..ebe241c467 100644 --- a/src/simd/simd_test.go +++ b/src/simd/simd_test.go @@ -43,7 +43,7 @@ func TestType(t *testing.T) { return } v.z = maskT(simd.LoadInt32x4(&maskv).AsMask32x4()) - *v.y = v.y.MaskedAdd(v.x, simd.Mask32x4(v.z)) + *v.y = v.y.AddMasked(v.x, simd.Mask32x4(v.z)) got := [4]int32{} v.y.Store(&got) @@ -125,7 +125,7 @@ func TestMaskConversion(t *testing.T) { mask := y.Sub(x).AsMask32x4() v = [4]int32{5, 6, 7, 8} y = simd.LoadInt32x4(&v) - y = y.MaskedAdd(x, mask) + y = y.AddMasked(x, mask) got := [4]int32{6, 0, 8, 0} y.Store(&v) for i := range 4 { @@ -148,7 +148,7 @@ func TestMaskedAdd(t *testing.T) { t.Skip("Test requires HasAVX512, not available on this hardware") return } - testInt32x4BinaryMasked(t, []int32{1, 2, 3, 4}, []int32{5, 6, 7, 8}, []int32{-1, -1, 0, 0}, []int32{6, 8, 0, 0}, "MaskedAdd") + testInt32x4BinaryMasked(t, []int32{1, 2, 3, 4}, []int32{5, 6, 7, 8}, []int32{-1, -1, 0, 0}, []int32{6, 8, 0, 0}, "AddMasked") } // checkInt8Slices ensures that b and a are equal, to the end of b. diff --git a/src/simd/simd_wrapped_test.go b/src/simd/simd_wrapped_test.go index d4cf7f6b74..bdbb25bfce 100644 --- a/src/simd/simd_wrapped_test.go +++ b/src/simd/simd_wrapped_test.go @@ -56,20 +56,20 @@ func testFloat32x4BinaryMasked(t *testing.T, v0 []float32, v1 []float32, v2 []in vec1 := simd.LoadFloat32x4Slice(v1) vec2 := simd.LoadInt32x4Slice(v2) switch which { - case "MaskedAdd": - gotv = vec0.MaskedAdd(vec1, vec2.AsMask32x4()) - case "MaskedDiv": - gotv = vec0.MaskedDiv(vec1, vec2.AsMask32x4()) - case "MaskedMax": - gotv = vec0.MaskedMax(vec1, vec2.AsMask32x4()) - case "MaskedMin": - gotv = vec0.MaskedMin(vec1, vec2.AsMask32x4()) - case "MaskedMul": - gotv = vec0.MaskedMul(vec1, vec2.AsMask32x4()) - case "MaskedMulByPowOf2": - gotv = vec0.MaskedMulByPowOf2(vec1, vec2.AsMask32x4()) - case "MaskedSub": - gotv = vec0.MaskedSub(vec1, vec2.AsMask32x4()) + case "AddMasked": + gotv = vec0.AddMasked(vec1, vec2.AsMask32x4()) + case "DivMasked": + gotv = vec0.DivMasked(vec1, vec2.AsMask32x4()) + case "MaxMasked": + gotv = vec0.MaxMasked(vec1, vec2.AsMask32x4()) + case "MinMasked": + gotv = vec0.MinMasked(vec1, vec2.AsMask32x4()) + case "MulByPowOf2Masked": + gotv = vec0.MulByPowOf2Masked(vec1, vec2.AsMask32x4()) + case "MulMasked": + gotv = vec0.MulMasked(vec1, vec2.AsMask32x4()) + case "SubMasked": + gotv = vec0.SubMasked(vec1, vec2.AsMask32x4()) default: t.Errorf("Unknown method: Float32x4.%s", which) @@ -123,20 +123,20 @@ func testFloat32x4MaskedCompare(t *testing.T, v0 []float32, v1 []float32, v2 []i vec1 := simd.LoadFloat32x4Slice(v1) vec2 := simd.LoadInt32x4Slice(v2) switch which { - case "MaskedEqual": - gotv = vec0.MaskedEqual(vec1, vec2.AsMask32x4()).AsInt32x4() - case "MaskedGreater": - gotv = vec0.MaskedGreater(vec1, vec2.AsMask32x4()).AsInt32x4() - case "MaskedGreaterEqual": - gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask32x4()).AsInt32x4() - case "MaskedIsNan": - gotv = vec0.MaskedIsNan(vec1, vec2.AsMask32x4()).AsInt32x4() - case "MaskedLess": - gotv = vec0.MaskedLess(vec1, vec2.AsMask32x4()).AsInt32x4() - case "MaskedLessEqual": - gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask32x4()).AsInt32x4() - case "MaskedNotEqual": - gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask32x4()).AsInt32x4() + case "EqualMasked": + gotv = vec0.EqualMasked(vec1, vec2.AsMask32x4()).AsInt32x4() + case "GreaterEqualMasked": + gotv = vec0.GreaterEqualMasked(vec1, vec2.AsMask32x4()).AsInt32x4() + case "GreaterMasked": + gotv = vec0.GreaterMasked(vec1, vec2.AsMask32x4()).AsInt32x4() + case "IsNanMasked": + gotv = vec0.IsNanMasked(vec1, vec2.AsMask32x4()).AsInt32x4() + case "LessEqualMasked": + gotv = vec0.LessEqualMasked(vec1, vec2.AsMask32x4()).AsInt32x4() + case "LessMasked": + gotv = vec0.LessMasked(vec1, vec2.AsMask32x4()).AsInt32x4() + case "NotEqualMasked": + gotv = vec0.NotEqualMasked(vec1, vec2.AsMask32x4()).AsInt32x4() default: t.Errorf("Unknown method: Float32x4.%s", which) @@ -184,12 +184,12 @@ func testFloat32x4TernaryMasked(t *testing.T, v0 []float32, v1 []float32, v2 []f vec2 := simd.LoadFloat32x4Slice(v2) vec3 := simd.LoadInt32x4Slice(v3) switch which { - case "MaskedFusedMultiplyAdd": - gotv = vec0.MaskedFusedMultiplyAdd(vec1, vec2, vec3.AsMask32x4()) - case "MaskedFusedMultiplyAddSub": - gotv = vec0.MaskedFusedMultiplyAddSub(vec1, vec2, vec3.AsMask32x4()) - case "MaskedFusedMultiplySubAdd": - gotv = vec0.MaskedFusedMultiplySubAdd(vec1, vec2, vec3.AsMask32x4()) + case "FusedMultiplyAddMasked": + gotv = vec0.FusedMultiplyAddMasked(vec1, vec2, vec3.AsMask32x4()) + case "FusedMultiplyAddSubMasked": + gotv = vec0.FusedMultiplyAddSubMasked(vec1, vec2, vec3.AsMask32x4()) + case "FusedMultiplySubAddMasked": + gotv = vec0.FusedMultiplySubAddMasked(vec1, vec2, vec3.AsMask32x4()) default: t.Errorf("Unknown method: Float32x4.%s", which) @@ -241,12 +241,12 @@ func testFloat32x4UnaryMasked(t *testing.T, v0 []float32, v1 []int32, want []flo vec0 := simd.LoadFloat32x4Slice(v0) vec1 := simd.LoadInt32x4Slice(v1) switch which { - case "MaskedApproximateReciprocal": - gotv = vec0.MaskedApproximateReciprocal(vec1.AsMask32x4()) - case "MaskedApproximateReciprocalOfSqrt": - gotv = vec0.MaskedApproximateReciprocalOfSqrt(vec1.AsMask32x4()) - case "MaskedSqrt": - gotv = vec0.MaskedSqrt(vec1.AsMask32x4()) + case "ApproximateReciprocalMasked": + gotv = vec0.ApproximateReciprocalMasked(vec1.AsMask32x4()) + case "ApproximateReciprocalOfSqrtMasked": + gotv = vec0.ApproximateReciprocalOfSqrtMasked(vec1.AsMask32x4()) + case "SqrtMasked": + gotv = vec0.SqrtMasked(vec1.AsMask32x4()) default: t.Errorf("Unknown method: Float32x4.%s", which) @@ -306,20 +306,20 @@ func testFloat32x8BinaryMasked(t *testing.T, v0 []float32, v1 []float32, v2 []in vec1 := simd.LoadFloat32x8Slice(v1) vec2 := simd.LoadInt32x8Slice(v2) switch which { - case "MaskedAdd": - gotv = vec0.MaskedAdd(vec1, vec2.AsMask32x8()) - case "MaskedDiv": - gotv = vec0.MaskedDiv(vec1, vec2.AsMask32x8()) - case "MaskedMax": - gotv = vec0.MaskedMax(vec1, vec2.AsMask32x8()) - case "MaskedMin": - gotv = vec0.MaskedMin(vec1, vec2.AsMask32x8()) - case "MaskedMul": - gotv = vec0.MaskedMul(vec1, vec2.AsMask32x8()) - case "MaskedMulByPowOf2": - gotv = vec0.MaskedMulByPowOf2(vec1, vec2.AsMask32x8()) - case "MaskedSub": - gotv = vec0.MaskedSub(vec1, vec2.AsMask32x8()) + case "AddMasked": + gotv = vec0.AddMasked(vec1, vec2.AsMask32x8()) + case "DivMasked": + gotv = vec0.DivMasked(vec1, vec2.AsMask32x8()) + case "MaxMasked": + gotv = vec0.MaxMasked(vec1, vec2.AsMask32x8()) + case "MinMasked": + gotv = vec0.MinMasked(vec1, vec2.AsMask32x8()) + case "MulByPowOf2Masked": + gotv = vec0.MulByPowOf2Masked(vec1, vec2.AsMask32x8()) + case "MulMasked": + gotv = vec0.MulMasked(vec1, vec2.AsMask32x8()) + case "SubMasked": + gotv = vec0.SubMasked(vec1, vec2.AsMask32x8()) default: t.Errorf("Unknown method: Float32x8.%s", which) @@ -373,20 +373,20 @@ func testFloat32x8MaskedCompare(t *testing.T, v0 []float32, v1 []float32, v2 []i vec1 := simd.LoadFloat32x8Slice(v1) vec2 := simd.LoadInt32x8Slice(v2) switch which { - case "MaskedEqual": - gotv = vec0.MaskedEqual(vec1, vec2.AsMask32x8()).AsInt32x8() - case "MaskedGreater": - gotv = vec0.MaskedGreater(vec1, vec2.AsMask32x8()).AsInt32x8() - case "MaskedGreaterEqual": - gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask32x8()).AsInt32x8() - case "MaskedIsNan": - gotv = vec0.MaskedIsNan(vec1, vec2.AsMask32x8()).AsInt32x8() - case "MaskedLess": - gotv = vec0.MaskedLess(vec1, vec2.AsMask32x8()).AsInt32x8() - case "MaskedLessEqual": - gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask32x8()).AsInt32x8() - case "MaskedNotEqual": - gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask32x8()).AsInt32x8() + case "EqualMasked": + gotv = vec0.EqualMasked(vec1, vec2.AsMask32x8()).AsInt32x8() + case "GreaterEqualMasked": + gotv = vec0.GreaterEqualMasked(vec1, vec2.AsMask32x8()).AsInt32x8() + case "GreaterMasked": + gotv = vec0.GreaterMasked(vec1, vec2.AsMask32x8()).AsInt32x8() + case "IsNanMasked": + gotv = vec0.IsNanMasked(vec1, vec2.AsMask32x8()).AsInt32x8() + case "LessEqualMasked": + gotv = vec0.LessEqualMasked(vec1, vec2.AsMask32x8()).AsInt32x8() + case "LessMasked": + gotv = vec0.LessMasked(vec1, vec2.AsMask32x8()).AsInt32x8() + case "NotEqualMasked": + gotv = vec0.NotEqualMasked(vec1, vec2.AsMask32x8()).AsInt32x8() default: t.Errorf("Unknown method: Float32x8.%s", which) @@ -434,12 +434,12 @@ func testFloat32x8TernaryMasked(t *testing.T, v0 []float32, v1 []float32, v2 []f vec2 := simd.LoadFloat32x8Slice(v2) vec3 := simd.LoadInt32x8Slice(v3) switch which { - case "MaskedFusedMultiplyAdd": - gotv = vec0.MaskedFusedMultiplyAdd(vec1, vec2, vec3.AsMask32x8()) - case "MaskedFusedMultiplyAddSub": - gotv = vec0.MaskedFusedMultiplyAddSub(vec1, vec2, vec3.AsMask32x8()) - case "MaskedFusedMultiplySubAdd": - gotv = vec0.MaskedFusedMultiplySubAdd(vec1, vec2, vec3.AsMask32x8()) + case "FusedMultiplyAddMasked": + gotv = vec0.FusedMultiplyAddMasked(vec1, vec2, vec3.AsMask32x8()) + case "FusedMultiplyAddSubMasked": + gotv = vec0.FusedMultiplyAddSubMasked(vec1, vec2, vec3.AsMask32x8()) + case "FusedMultiplySubAddMasked": + gotv = vec0.FusedMultiplySubAddMasked(vec1, vec2, vec3.AsMask32x8()) default: t.Errorf("Unknown method: Float32x8.%s", which) @@ -491,12 +491,12 @@ func testFloat32x8UnaryMasked(t *testing.T, v0 []float32, v1 []int32, want []flo vec0 := simd.LoadFloat32x8Slice(v0) vec1 := simd.LoadInt32x8Slice(v1) switch which { - case "MaskedApproximateReciprocal": - gotv = vec0.MaskedApproximateReciprocal(vec1.AsMask32x8()) - case "MaskedApproximateReciprocalOfSqrt": - gotv = vec0.MaskedApproximateReciprocalOfSqrt(vec1.AsMask32x8()) - case "MaskedSqrt": - gotv = vec0.MaskedSqrt(vec1.AsMask32x8()) + case "ApproximateReciprocalMasked": + gotv = vec0.ApproximateReciprocalMasked(vec1.AsMask32x8()) + case "ApproximateReciprocalOfSqrtMasked": + gotv = vec0.ApproximateReciprocalOfSqrtMasked(vec1.AsMask32x8()) + case "SqrtMasked": + gotv = vec0.SqrtMasked(vec1.AsMask32x8()) default: t.Errorf("Unknown method: Float32x8.%s", which) @@ -550,20 +550,20 @@ func testFloat32x16BinaryMasked(t *testing.T, v0 []float32, v1 []float32, v2 []i vec1 := simd.LoadFloat32x16Slice(v1) vec2 := simd.LoadInt32x16Slice(v2) switch which { - case "MaskedAdd": - gotv = vec0.MaskedAdd(vec1, vec2.AsMask32x16()) - case "MaskedDiv": - gotv = vec0.MaskedDiv(vec1, vec2.AsMask32x16()) - case "MaskedMax": - gotv = vec0.MaskedMax(vec1, vec2.AsMask32x16()) - case "MaskedMin": - gotv = vec0.MaskedMin(vec1, vec2.AsMask32x16()) - case "MaskedMul": - gotv = vec0.MaskedMul(vec1, vec2.AsMask32x16()) - case "MaskedMulByPowOf2": - gotv = vec0.MaskedMulByPowOf2(vec1, vec2.AsMask32x16()) - case "MaskedSub": - gotv = vec0.MaskedSub(vec1, vec2.AsMask32x16()) + case "AddMasked": + gotv = vec0.AddMasked(vec1, vec2.AsMask32x16()) + case "DivMasked": + gotv = vec0.DivMasked(vec1, vec2.AsMask32x16()) + case "MaxMasked": + gotv = vec0.MaxMasked(vec1, vec2.AsMask32x16()) + case "MinMasked": + gotv = vec0.MinMasked(vec1, vec2.AsMask32x16()) + case "MulByPowOf2Masked": + gotv = vec0.MulByPowOf2Masked(vec1, vec2.AsMask32x16()) + case "MulMasked": + gotv = vec0.MulMasked(vec1, vec2.AsMask32x16()) + case "SubMasked": + gotv = vec0.SubMasked(vec1, vec2.AsMask32x16()) default: t.Errorf("Unknown method: Float32x16.%s", which) @@ -617,20 +617,20 @@ func testFloat32x16MaskedCompare(t *testing.T, v0 []float32, v1 []float32, v2 [] vec1 := simd.LoadFloat32x16Slice(v1) vec2 := simd.LoadInt32x16Slice(v2) switch which { - case "MaskedEqual": - gotv = vec0.MaskedEqual(vec1, vec2.AsMask32x16()).AsInt32x16() - case "MaskedGreater": - gotv = vec0.MaskedGreater(vec1, vec2.AsMask32x16()).AsInt32x16() - case "MaskedGreaterEqual": - gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask32x16()).AsInt32x16() - case "MaskedIsNan": - gotv = vec0.MaskedIsNan(vec1, vec2.AsMask32x16()).AsInt32x16() - case "MaskedLess": - gotv = vec0.MaskedLess(vec1, vec2.AsMask32x16()).AsInt32x16() - case "MaskedLessEqual": - gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask32x16()).AsInt32x16() - case "MaskedNotEqual": - gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask32x16()).AsInt32x16() + case "EqualMasked": + gotv = vec0.EqualMasked(vec1, vec2.AsMask32x16()).AsInt32x16() + case "GreaterEqualMasked": + gotv = vec0.GreaterEqualMasked(vec1, vec2.AsMask32x16()).AsInt32x16() + case "GreaterMasked": + gotv = vec0.GreaterMasked(vec1, vec2.AsMask32x16()).AsInt32x16() + case "IsNanMasked": + gotv = vec0.IsNanMasked(vec1, vec2.AsMask32x16()).AsInt32x16() + case "LessEqualMasked": + gotv = vec0.LessEqualMasked(vec1, vec2.AsMask32x16()).AsInt32x16() + case "LessMasked": + gotv = vec0.LessMasked(vec1, vec2.AsMask32x16()).AsInt32x16() + case "NotEqualMasked": + gotv = vec0.NotEqualMasked(vec1, vec2.AsMask32x16()).AsInt32x16() default: t.Errorf("Unknown method: Float32x16.%s", which) @@ -678,12 +678,12 @@ func testFloat32x16TernaryMasked(t *testing.T, v0 []float32, v1 []float32, v2 [] vec2 := simd.LoadFloat32x16Slice(v2) vec3 := simd.LoadInt32x16Slice(v3) switch which { - case "MaskedFusedMultiplyAdd": - gotv = vec0.MaskedFusedMultiplyAdd(vec1, vec2, vec3.AsMask32x16()) - case "MaskedFusedMultiplyAddSub": - gotv = vec0.MaskedFusedMultiplyAddSub(vec1, vec2, vec3.AsMask32x16()) - case "MaskedFusedMultiplySubAdd": - gotv = vec0.MaskedFusedMultiplySubAdd(vec1, vec2, vec3.AsMask32x16()) + case "FusedMultiplyAddMasked": + gotv = vec0.FusedMultiplyAddMasked(vec1, vec2, vec3.AsMask32x16()) + case "FusedMultiplyAddSubMasked": + gotv = vec0.FusedMultiplyAddSubMasked(vec1, vec2, vec3.AsMask32x16()) + case "FusedMultiplySubAddMasked": + gotv = vec0.FusedMultiplySubAddMasked(vec1, vec2, vec3.AsMask32x16()) default: t.Errorf("Unknown method: Float32x16.%s", which) @@ -727,12 +727,12 @@ func testFloat32x16UnaryMasked(t *testing.T, v0 []float32, v1 []int32, want []fl vec0 := simd.LoadFloat32x16Slice(v0) vec1 := simd.LoadInt32x16Slice(v1) switch which { - case "MaskedApproximateReciprocal": - gotv = vec0.MaskedApproximateReciprocal(vec1.AsMask32x16()) - case "MaskedApproximateReciprocalOfSqrt": - gotv = vec0.MaskedApproximateReciprocalOfSqrt(vec1.AsMask32x16()) - case "MaskedSqrt": - gotv = vec0.MaskedSqrt(vec1.AsMask32x16()) + case "ApproximateReciprocalMasked": + gotv = vec0.ApproximateReciprocalMasked(vec1.AsMask32x16()) + case "ApproximateReciprocalOfSqrtMasked": + gotv = vec0.ApproximateReciprocalOfSqrtMasked(vec1.AsMask32x16()) + case "SqrtMasked": + gotv = vec0.SqrtMasked(vec1.AsMask32x16()) default: t.Errorf("Unknown method: Float32x16.%s", which) @@ -794,20 +794,20 @@ func testFloat64x2BinaryMasked(t *testing.T, v0 []float64, v1 []float64, v2 []in vec1 := simd.LoadFloat64x2Slice(v1) vec2 := simd.LoadInt64x2Slice(v2) switch which { - case "MaskedAdd": - gotv = vec0.MaskedAdd(vec1, vec2.AsMask64x2()) - case "MaskedDiv": - gotv = vec0.MaskedDiv(vec1, vec2.AsMask64x2()) - case "MaskedMax": - gotv = vec0.MaskedMax(vec1, vec2.AsMask64x2()) - case "MaskedMin": - gotv = vec0.MaskedMin(vec1, vec2.AsMask64x2()) - case "MaskedMul": - gotv = vec0.MaskedMul(vec1, vec2.AsMask64x2()) - case "MaskedMulByPowOf2": - gotv = vec0.MaskedMulByPowOf2(vec1, vec2.AsMask64x2()) - case "MaskedSub": - gotv = vec0.MaskedSub(vec1, vec2.AsMask64x2()) + case "AddMasked": + gotv = vec0.AddMasked(vec1, vec2.AsMask64x2()) + case "DivMasked": + gotv = vec0.DivMasked(vec1, vec2.AsMask64x2()) + case "MaxMasked": + gotv = vec0.MaxMasked(vec1, vec2.AsMask64x2()) + case "MinMasked": + gotv = vec0.MinMasked(vec1, vec2.AsMask64x2()) + case "MulByPowOf2Masked": + gotv = vec0.MulByPowOf2Masked(vec1, vec2.AsMask64x2()) + case "MulMasked": + gotv = vec0.MulMasked(vec1, vec2.AsMask64x2()) + case "SubMasked": + gotv = vec0.SubMasked(vec1, vec2.AsMask64x2()) default: t.Errorf("Unknown method: Float64x2.%s", which) @@ -861,20 +861,20 @@ func testFloat64x2MaskedCompare(t *testing.T, v0 []float64, v1 []float64, v2 []i vec1 := simd.LoadFloat64x2Slice(v1) vec2 := simd.LoadInt64x2Slice(v2) switch which { - case "MaskedEqual": - gotv = vec0.MaskedEqual(vec1, vec2.AsMask64x2()).AsInt64x2() - case "MaskedGreater": - gotv = vec0.MaskedGreater(vec1, vec2.AsMask64x2()).AsInt64x2() - case "MaskedGreaterEqual": - gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask64x2()).AsInt64x2() - case "MaskedIsNan": - gotv = vec0.MaskedIsNan(vec1, vec2.AsMask64x2()).AsInt64x2() - case "MaskedLess": - gotv = vec0.MaskedLess(vec1, vec2.AsMask64x2()).AsInt64x2() - case "MaskedLessEqual": - gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask64x2()).AsInt64x2() - case "MaskedNotEqual": - gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask64x2()).AsInt64x2() + case "EqualMasked": + gotv = vec0.EqualMasked(vec1, vec2.AsMask64x2()).AsInt64x2() + case "GreaterEqualMasked": + gotv = vec0.GreaterEqualMasked(vec1, vec2.AsMask64x2()).AsInt64x2() + case "GreaterMasked": + gotv = vec0.GreaterMasked(vec1, vec2.AsMask64x2()).AsInt64x2() + case "IsNanMasked": + gotv = vec0.IsNanMasked(vec1, vec2.AsMask64x2()).AsInt64x2() + case "LessEqualMasked": + gotv = vec0.LessEqualMasked(vec1, vec2.AsMask64x2()).AsInt64x2() + case "LessMasked": + gotv = vec0.LessMasked(vec1, vec2.AsMask64x2()).AsInt64x2() + case "NotEqualMasked": + gotv = vec0.NotEqualMasked(vec1, vec2.AsMask64x2()).AsInt64x2() default: t.Errorf("Unknown method: Float64x2.%s", which) @@ -922,12 +922,12 @@ func testFloat64x2TernaryMasked(t *testing.T, v0 []float64, v1 []float64, v2 []f vec2 := simd.LoadFloat64x2Slice(v2) vec3 := simd.LoadInt64x2Slice(v3) switch which { - case "MaskedFusedMultiplyAdd": - gotv = vec0.MaskedFusedMultiplyAdd(vec1, vec2, vec3.AsMask64x2()) - case "MaskedFusedMultiplyAddSub": - gotv = vec0.MaskedFusedMultiplyAddSub(vec1, vec2, vec3.AsMask64x2()) - case "MaskedFusedMultiplySubAdd": - gotv = vec0.MaskedFusedMultiplySubAdd(vec1, vec2, vec3.AsMask64x2()) + case "FusedMultiplyAddMasked": + gotv = vec0.FusedMultiplyAddMasked(vec1, vec2, vec3.AsMask64x2()) + case "FusedMultiplyAddSubMasked": + gotv = vec0.FusedMultiplyAddSubMasked(vec1, vec2, vec3.AsMask64x2()) + case "FusedMultiplySubAddMasked": + gotv = vec0.FusedMultiplySubAddMasked(vec1, vec2, vec3.AsMask64x2()) default: t.Errorf("Unknown method: Float64x2.%s", which) @@ -979,12 +979,12 @@ func testFloat64x2UnaryMasked(t *testing.T, v0 []float64, v1 []int64, want []flo vec0 := simd.LoadFloat64x2Slice(v0) vec1 := simd.LoadInt64x2Slice(v1) switch which { - case "MaskedApproximateReciprocal": - gotv = vec0.MaskedApproximateReciprocal(vec1.AsMask64x2()) - case "MaskedApproximateReciprocalOfSqrt": - gotv = vec0.MaskedApproximateReciprocalOfSqrt(vec1.AsMask64x2()) - case "MaskedSqrt": - gotv = vec0.MaskedSqrt(vec1.AsMask64x2()) + case "ApproximateReciprocalMasked": + gotv = vec0.ApproximateReciprocalMasked(vec1.AsMask64x2()) + case "ApproximateReciprocalOfSqrtMasked": + gotv = vec0.ApproximateReciprocalOfSqrtMasked(vec1.AsMask64x2()) + case "SqrtMasked": + gotv = vec0.SqrtMasked(vec1.AsMask64x2()) default: t.Errorf("Unknown method: Float64x2.%s", which) @@ -1044,20 +1044,20 @@ func testFloat64x4BinaryMasked(t *testing.T, v0 []float64, v1 []float64, v2 []in vec1 := simd.LoadFloat64x4Slice(v1) vec2 := simd.LoadInt64x4Slice(v2) switch which { - case "MaskedAdd": - gotv = vec0.MaskedAdd(vec1, vec2.AsMask64x4()) - case "MaskedDiv": - gotv = vec0.MaskedDiv(vec1, vec2.AsMask64x4()) - case "MaskedMax": - gotv = vec0.MaskedMax(vec1, vec2.AsMask64x4()) - case "MaskedMin": - gotv = vec0.MaskedMin(vec1, vec2.AsMask64x4()) - case "MaskedMul": - gotv = vec0.MaskedMul(vec1, vec2.AsMask64x4()) - case "MaskedMulByPowOf2": - gotv = vec0.MaskedMulByPowOf2(vec1, vec2.AsMask64x4()) - case "MaskedSub": - gotv = vec0.MaskedSub(vec1, vec2.AsMask64x4()) + case "AddMasked": + gotv = vec0.AddMasked(vec1, vec2.AsMask64x4()) + case "DivMasked": + gotv = vec0.DivMasked(vec1, vec2.AsMask64x4()) + case "MaxMasked": + gotv = vec0.MaxMasked(vec1, vec2.AsMask64x4()) + case "MinMasked": + gotv = vec0.MinMasked(vec1, vec2.AsMask64x4()) + case "MulByPowOf2Masked": + gotv = vec0.MulByPowOf2Masked(vec1, vec2.AsMask64x4()) + case "MulMasked": + gotv = vec0.MulMasked(vec1, vec2.AsMask64x4()) + case "SubMasked": + gotv = vec0.SubMasked(vec1, vec2.AsMask64x4()) default: t.Errorf("Unknown method: Float64x4.%s", which) @@ -1111,20 +1111,20 @@ func testFloat64x4MaskedCompare(t *testing.T, v0 []float64, v1 []float64, v2 []i vec1 := simd.LoadFloat64x4Slice(v1) vec2 := simd.LoadInt64x4Slice(v2) switch which { - case "MaskedEqual": - gotv = vec0.MaskedEqual(vec1, vec2.AsMask64x4()).AsInt64x4() - case "MaskedGreater": - gotv = vec0.MaskedGreater(vec1, vec2.AsMask64x4()).AsInt64x4() - case "MaskedGreaterEqual": - gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask64x4()).AsInt64x4() - case "MaskedIsNan": - gotv = vec0.MaskedIsNan(vec1, vec2.AsMask64x4()).AsInt64x4() - case "MaskedLess": - gotv = vec0.MaskedLess(vec1, vec2.AsMask64x4()).AsInt64x4() - case "MaskedLessEqual": - gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask64x4()).AsInt64x4() - case "MaskedNotEqual": - gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask64x4()).AsInt64x4() + case "EqualMasked": + gotv = vec0.EqualMasked(vec1, vec2.AsMask64x4()).AsInt64x4() + case "GreaterEqualMasked": + gotv = vec0.GreaterEqualMasked(vec1, vec2.AsMask64x4()).AsInt64x4() + case "GreaterMasked": + gotv = vec0.GreaterMasked(vec1, vec2.AsMask64x4()).AsInt64x4() + case "IsNanMasked": + gotv = vec0.IsNanMasked(vec1, vec2.AsMask64x4()).AsInt64x4() + case "LessEqualMasked": + gotv = vec0.LessEqualMasked(vec1, vec2.AsMask64x4()).AsInt64x4() + case "LessMasked": + gotv = vec0.LessMasked(vec1, vec2.AsMask64x4()).AsInt64x4() + case "NotEqualMasked": + gotv = vec0.NotEqualMasked(vec1, vec2.AsMask64x4()).AsInt64x4() default: t.Errorf("Unknown method: Float64x4.%s", which) @@ -1172,12 +1172,12 @@ func testFloat64x4TernaryMasked(t *testing.T, v0 []float64, v1 []float64, v2 []f vec2 := simd.LoadFloat64x4Slice(v2) vec3 := simd.LoadInt64x4Slice(v3) switch which { - case "MaskedFusedMultiplyAdd": - gotv = vec0.MaskedFusedMultiplyAdd(vec1, vec2, vec3.AsMask64x4()) - case "MaskedFusedMultiplyAddSub": - gotv = vec0.MaskedFusedMultiplyAddSub(vec1, vec2, vec3.AsMask64x4()) - case "MaskedFusedMultiplySubAdd": - gotv = vec0.MaskedFusedMultiplySubAdd(vec1, vec2, vec3.AsMask64x4()) + case "FusedMultiplyAddMasked": + gotv = vec0.FusedMultiplyAddMasked(vec1, vec2, vec3.AsMask64x4()) + case "FusedMultiplyAddSubMasked": + gotv = vec0.FusedMultiplyAddSubMasked(vec1, vec2, vec3.AsMask64x4()) + case "FusedMultiplySubAddMasked": + gotv = vec0.FusedMultiplySubAddMasked(vec1, vec2, vec3.AsMask64x4()) default: t.Errorf("Unknown method: Float64x4.%s", which) @@ -1229,12 +1229,12 @@ func testFloat64x4UnaryMasked(t *testing.T, v0 []float64, v1 []int64, want []flo vec0 := simd.LoadFloat64x4Slice(v0) vec1 := simd.LoadInt64x4Slice(v1) switch which { - case "MaskedApproximateReciprocal": - gotv = vec0.MaskedApproximateReciprocal(vec1.AsMask64x4()) - case "MaskedApproximateReciprocalOfSqrt": - gotv = vec0.MaskedApproximateReciprocalOfSqrt(vec1.AsMask64x4()) - case "MaskedSqrt": - gotv = vec0.MaskedSqrt(vec1.AsMask64x4()) + case "ApproximateReciprocalMasked": + gotv = vec0.ApproximateReciprocalMasked(vec1.AsMask64x4()) + case "ApproximateReciprocalOfSqrtMasked": + gotv = vec0.ApproximateReciprocalOfSqrtMasked(vec1.AsMask64x4()) + case "SqrtMasked": + gotv = vec0.SqrtMasked(vec1.AsMask64x4()) default: t.Errorf("Unknown method: Float64x4.%s", which) @@ -1288,20 +1288,20 @@ func testFloat64x8BinaryMasked(t *testing.T, v0 []float64, v1 []float64, v2 []in vec1 := simd.LoadFloat64x8Slice(v1) vec2 := simd.LoadInt64x8Slice(v2) switch which { - case "MaskedAdd": - gotv = vec0.MaskedAdd(vec1, vec2.AsMask64x8()) - case "MaskedDiv": - gotv = vec0.MaskedDiv(vec1, vec2.AsMask64x8()) - case "MaskedMax": - gotv = vec0.MaskedMax(vec1, vec2.AsMask64x8()) - case "MaskedMin": - gotv = vec0.MaskedMin(vec1, vec2.AsMask64x8()) - case "MaskedMul": - gotv = vec0.MaskedMul(vec1, vec2.AsMask64x8()) - case "MaskedMulByPowOf2": - gotv = vec0.MaskedMulByPowOf2(vec1, vec2.AsMask64x8()) - case "MaskedSub": - gotv = vec0.MaskedSub(vec1, vec2.AsMask64x8()) + case "AddMasked": + gotv = vec0.AddMasked(vec1, vec2.AsMask64x8()) + case "DivMasked": + gotv = vec0.DivMasked(vec1, vec2.AsMask64x8()) + case "MaxMasked": + gotv = vec0.MaxMasked(vec1, vec2.AsMask64x8()) + case "MinMasked": + gotv = vec0.MinMasked(vec1, vec2.AsMask64x8()) + case "MulByPowOf2Masked": + gotv = vec0.MulByPowOf2Masked(vec1, vec2.AsMask64x8()) + case "MulMasked": + gotv = vec0.MulMasked(vec1, vec2.AsMask64x8()) + case "SubMasked": + gotv = vec0.SubMasked(vec1, vec2.AsMask64x8()) default: t.Errorf("Unknown method: Float64x8.%s", which) @@ -1355,20 +1355,20 @@ func testFloat64x8MaskedCompare(t *testing.T, v0 []float64, v1 []float64, v2 []i vec1 := simd.LoadFloat64x8Slice(v1) vec2 := simd.LoadInt64x8Slice(v2) switch which { - case "MaskedEqual": - gotv = vec0.MaskedEqual(vec1, vec2.AsMask64x8()).AsInt64x8() - case "MaskedGreater": - gotv = vec0.MaskedGreater(vec1, vec2.AsMask64x8()).AsInt64x8() - case "MaskedGreaterEqual": - gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask64x8()).AsInt64x8() - case "MaskedIsNan": - gotv = vec0.MaskedIsNan(vec1, vec2.AsMask64x8()).AsInt64x8() - case "MaskedLess": - gotv = vec0.MaskedLess(vec1, vec2.AsMask64x8()).AsInt64x8() - case "MaskedLessEqual": - gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask64x8()).AsInt64x8() - case "MaskedNotEqual": - gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask64x8()).AsInt64x8() + case "EqualMasked": + gotv = vec0.EqualMasked(vec1, vec2.AsMask64x8()).AsInt64x8() + case "GreaterEqualMasked": + gotv = vec0.GreaterEqualMasked(vec1, vec2.AsMask64x8()).AsInt64x8() + case "GreaterMasked": + gotv = vec0.GreaterMasked(vec1, vec2.AsMask64x8()).AsInt64x8() + case "IsNanMasked": + gotv = vec0.IsNanMasked(vec1, vec2.AsMask64x8()).AsInt64x8() + case "LessEqualMasked": + gotv = vec0.LessEqualMasked(vec1, vec2.AsMask64x8()).AsInt64x8() + case "LessMasked": + gotv = vec0.LessMasked(vec1, vec2.AsMask64x8()).AsInt64x8() + case "NotEqualMasked": + gotv = vec0.NotEqualMasked(vec1, vec2.AsMask64x8()).AsInt64x8() default: t.Errorf("Unknown method: Float64x8.%s", which) @@ -1416,12 +1416,12 @@ func testFloat64x8TernaryMasked(t *testing.T, v0 []float64, v1 []float64, v2 []f vec2 := simd.LoadFloat64x8Slice(v2) vec3 := simd.LoadInt64x8Slice(v3) switch which { - case "MaskedFusedMultiplyAdd": - gotv = vec0.MaskedFusedMultiplyAdd(vec1, vec2, vec3.AsMask64x8()) - case "MaskedFusedMultiplyAddSub": - gotv = vec0.MaskedFusedMultiplyAddSub(vec1, vec2, vec3.AsMask64x8()) - case "MaskedFusedMultiplySubAdd": - gotv = vec0.MaskedFusedMultiplySubAdd(vec1, vec2, vec3.AsMask64x8()) + case "FusedMultiplyAddMasked": + gotv = vec0.FusedMultiplyAddMasked(vec1, vec2, vec3.AsMask64x8()) + case "FusedMultiplyAddSubMasked": + gotv = vec0.FusedMultiplyAddSubMasked(vec1, vec2, vec3.AsMask64x8()) + case "FusedMultiplySubAddMasked": + gotv = vec0.FusedMultiplySubAddMasked(vec1, vec2, vec3.AsMask64x8()) default: t.Errorf("Unknown method: Float64x8.%s", which) @@ -1465,12 +1465,12 @@ func testFloat64x8UnaryMasked(t *testing.T, v0 []float64, v1 []int64, want []flo vec0 := simd.LoadFloat64x8Slice(v0) vec1 := simd.LoadInt64x8Slice(v1) switch which { - case "MaskedApproximateReciprocal": - gotv = vec0.MaskedApproximateReciprocal(vec1.AsMask64x8()) - case "MaskedApproximateReciprocalOfSqrt": - gotv = vec0.MaskedApproximateReciprocalOfSqrt(vec1.AsMask64x8()) - case "MaskedSqrt": - gotv = vec0.MaskedSqrt(vec1.AsMask64x8()) + case "ApproximateReciprocalMasked": + gotv = vec0.ApproximateReciprocalMasked(vec1.AsMask64x8()) + case "ApproximateReciprocalOfSqrtMasked": + gotv = vec0.ApproximateReciprocalOfSqrtMasked(vec1.AsMask64x8()) + case "SqrtMasked": + gotv = vec0.SqrtMasked(vec1.AsMask64x8()) default: t.Errorf("Unknown method: Float64x8.%s", which) @@ -1532,18 +1532,18 @@ func testInt8x16BinaryMasked(t *testing.T, v0 []int8, v1 []int8, v2 []int8, want vec1 := simd.LoadInt8x16Slice(v1) vec2 := simd.LoadInt8x16Slice(v2) switch which { - case "MaskedAdd": - gotv = vec0.MaskedAdd(vec1, vec2.AsMask8x16()) - case "MaskedMax": - gotv = vec0.MaskedMax(vec1, vec2.AsMask8x16()) - case "MaskedMin": - gotv = vec0.MaskedMin(vec1, vec2.AsMask8x16()) - case "MaskedSaturatedAdd": - gotv = vec0.MaskedSaturatedAdd(vec1, vec2.AsMask8x16()) - case "MaskedSaturatedSub": - gotv = vec0.MaskedSaturatedSub(vec1, vec2.AsMask8x16()) - case "MaskedSub": - gotv = vec0.MaskedSub(vec1, vec2.AsMask8x16()) + case "AddMasked": + gotv = vec0.AddMasked(vec1, vec2.AsMask8x16()) + case "MaxMasked": + gotv = vec0.MaxMasked(vec1, vec2.AsMask8x16()) + case "MinMasked": + gotv = vec0.MinMasked(vec1, vec2.AsMask8x16()) + case "SaturatedAddMasked": + gotv = vec0.SaturatedAddMasked(vec1, vec2.AsMask8x16()) + case "SaturatedSubMasked": + gotv = vec0.SaturatedSubMasked(vec1, vec2.AsMask8x16()) + case "SubMasked": + gotv = vec0.SubMasked(vec1, vec2.AsMask8x16()) default: t.Errorf("Unknown method: Int8x16.%s", which) @@ -1595,18 +1595,18 @@ func testInt8x16MaskedCompare(t *testing.T, v0 []int8, v1 []int8, v2 []int8, wan vec1 := simd.LoadInt8x16Slice(v1) vec2 := simd.LoadInt8x16Slice(v2) switch which { - case "MaskedEqual": - gotv = vec0.MaskedEqual(vec1, vec2.AsMask8x16()).AsInt8x16() - case "MaskedGreater": - gotv = vec0.MaskedGreater(vec1, vec2.AsMask8x16()).AsInt8x16() - case "MaskedGreaterEqual": - gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask8x16()).AsInt8x16() - case "MaskedLess": - gotv = vec0.MaskedLess(vec1, vec2.AsMask8x16()).AsInt8x16() - case "MaskedLessEqual": - gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask8x16()).AsInt8x16() - case "MaskedNotEqual": - gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask8x16()).AsInt8x16() + case "EqualMasked": + gotv = vec0.EqualMasked(vec1, vec2.AsMask8x16()).AsInt8x16() + case "GreaterEqualMasked": + gotv = vec0.GreaterEqualMasked(vec1, vec2.AsMask8x16()).AsInt8x16() + case "GreaterMasked": + gotv = vec0.GreaterMasked(vec1, vec2.AsMask8x16()).AsInt8x16() + case "LessEqualMasked": + gotv = vec0.LessEqualMasked(vec1, vec2.AsMask8x16()).AsInt8x16() + case "LessMasked": + gotv = vec0.LessMasked(vec1, vec2.AsMask8x16()).AsInt8x16() + case "NotEqualMasked": + gotv = vec0.NotEqualMasked(vec1, vec2.AsMask8x16()).AsInt8x16() default: t.Errorf("Unknown method: Int8x16.%s", which) @@ -1648,10 +1648,10 @@ func testInt8x16UnaryMasked(t *testing.T, v0 []int8, v1 []int8, want []int8, whi vec0 := simd.LoadInt8x16Slice(v0) vec1 := simd.LoadInt8x16Slice(v1) switch which { - case "MaskedAbsolute": - gotv = vec0.MaskedAbsolute(vec1.AsMask8x16()) - case "MaskedPopCount": - gotv = vec0.MaskedPopCount(vec1.AsMask8x16()) + case "AbsoluteMasked": + gotv = vec0.AbsoluteMasked(vec1.AsMask8x16()) + case "PopCountMasked": + gotv = vec0.PopCountMasked(vec1.AsMask8x16()) default: t.Errorf("Unknown method: Int8x16.%s", which) @@ -1713,18 +1713,18 @@ func testInt8x32BinaryMasked(t *testing.T, v0 []int8, v1 []int8, v2 []int8, want vec1 := simd.LoadInt8x32Slice(v1) vec2 := simd.LoadInt8x32Slice(v2) switch which { - case "MaskedAdd": - gotv = vec0.MaskedAdd(vec1, vec2.AsMask8x32()) - case "MaskedMax": - gotv = vec0.MaskedMax(vec1, vec2.AsMask8x32()) - case "MaskedMin": - gotv = vec0.MaskedMin(vec1, vec2.AsMask8x32()) - case "MaskedSaturatedAdd": - gotv = vec0.MaskedSaturatedAdd(vec1, vec2.AsMask8x32()) - case "MaskedSaturatedSub": - gotv = vec0.MaskedSaturatedSub(vec1, vec2.AsMask8x32()) - case "MaskedSub": - gotv = vec0.MaskedSub(vec1, vec2.AsMask8x32()) + case "AddMasked": + gotv = vec0.AddMasked(vec1, vec2.AsMask8x32()) + case "MaxMasked": + gotv = vec0.MaxMasked(vec1, vec2.AsMask8x32()) + case "MinMasked": + gotv = vec0.MinMasked(vec1, vec2.AsMask8x32()) + case "SaturatedAddMasked": + gotv = vec0.SaturatedAddMasked(vec1, vec2.AsMask8x32()) + case "SaturatedSubMasked": + gotv = vec0.SaturatedSubMasked(vec1, vec2.AsMask8x32()) + case "SubMasked": + gotv = vec0.SubMasked(vec1, vec2.AsMask8x32()) default: t.Errorf("Unknown method: Int8x32.%s", which) @@ -1776,18 +1776,18 @@ func testInt8x32MaskedCompare(t *testing.T, v0 []int8, v1 []int8, v2 []int8, wan vec1 := simd.LoadInt8x32Slice(v1) vec2 := simd.LoadInt8x32Slice(v2) switch which { - case "MaskedEqual": - gotv = vec0.MaskedEqual(vec1, vec2.AsMask8x32()).AsInt8x32() - case "MaskedGreater": - gotv = vec0.MaskedGreater(vec1, vec2.AsMask8x32()).AsInt8x32() - case "MaskedGreaterEqual": - gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask8x32()).AsInt8x32() - case "MaskedLess": - gotv = vec0.MaskedLess(vec1, vec2.AsMask8x32()).AsInt8x32() - case "MaskedLessEqual": - gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask8x32()).AsInt8x32() - case "MaskedNotEqual": - gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask8x32()).AsInt8x32() + case "EqualMasked": + gotv = vec0.EqualMasked(vec1, vec2.AsMask8x32()).AsInt8x32() + case "GreaterEqualMasked": + gotv = vec0.GreaterEqualMasked(vec1, vec2.AsMask8x32()).AsInt8x32() + case "GreaterMasked": + gotv = vec0.GreaterMasked(vec1, vec2.AsMask8x32()).AsInt8x32() + case "LessEqualMasked": + gotv = vec0.LessEqualMasked(vec1, vec2.AsMask8x32()).AsInt8x32() + case "LessMasked": + gotv = vec0.LessMasked(vec1, vec2.AsMask8x32()).AsInt8x32() + case "NotEqualMasked": + gotv = vec0.NotEqualMasked(vec1, vec2.AsMask8x32()).AsInt8x32() default: t.Errorf("Unknown method: Int8x32.%s", which) @@ -1829,10 +1829,10 @@ func testInt8x32UnaryMasked(t *testing.T, v0 []int8, v1 []int8, want []int8, whi vec0 := simd.LoadInt8x32Slice(v0) vec1 := simd.LoadInt8x32Slice(v1) switch which { - case "MaskedAbsolute": - gotv = vec0.MaskedAbsolute(vec1.AsMask8x32()) - case "MaskedPopCount": - gotv = vec0.MaskedPopCount(vec1.AsMask8x32()) + case "AbsoluteMasked": + gotv = vec0.AbsoluteMasked(vec1.AsMask8x32()) + case "PopCountMasked": + gotv = vec0.PopCountMasked(vec1.AsMask8x32()) default: t.Errorf("Unknown method: Int8x32.%s", which) @@ -1884,18 +1884,18 @@ func testInt8x64BinaryMasked(t *testing.T, v0 []int8, v1 []int8, v2 []int8, want vec1 := simd.LoadInt8x64Slice(v1) vec2 := simd.LoadInt8x64Slice(v2) switch which { - case "MaskedAdd": - gotv = vec0.MaskedAdd(vec1, vec2.AsMask8x64()) - case "MaskedMax": - gotv = vec0.MaskedMax(vec1, vec2.AsMask8x64()) - case "MaskedMin": - gotv = vec0.MaskedMin(vec1, vec2.AsMask8x64()) - case "MaskedSaturatedAdd": - gotv = vec0.MaskedSaturatedAdd(vec1, vec2.AsMask8x64()) - case "MaskedSaturatedSub": - gotv = vec0.MaskedSaturatedSub(vec1, vec2.AsMask8x64()) - case "MaskedSub": - gotv = vec0.MaskedSub(vec1, vec2.AsMask8x64()) + case "AddMasked": + gotv = vec0.AddMasked(vec1, vec2.AsMask8x64()) + case "MaxMasked": + gotv = vec0.MaxMasked(vec1, vec2.AsMask8x64()) + case "MinMasked": + gotv = vec0.MinMasked(vec1, vec2.AsMask8x64()) + case "SaturatedAddMasked": + gotv = vec0.SaturatedAddMasked(vec1, vec2.AsMask8x64()) + case "SaturatedSubMasked": + gotv = vec0.SaturatedSubMasked(vec1, vec2.AsMask8x64()) + case "SubMasked": + gotv = vec0.SubMasked(vec1, vec2.AsMask8x64()) default: t.Errorf("Unknown method: Int8x64.%s", which) @@ -1947,18 +1947,18 @@ func testInt8x64MaskedCompare(t *testing.T, v0 []int8, v1 []int8, v2 []int8, wan vec1 := simd.LoadInt8x64Slice(v1) vec2 := simd.LoadInt8x64Slice(v2) switch which { - case "MaskedEqual": - gotv = vec0.MaskedEqual(vec1, vec2.AsMask8x64()).AsInt8x64() - case "MaskedGreater": - gotv = vec0.MaskedGreater(vec1, vec2.AsMask8x64()).AsInt8x64() - case "MaskedGreaterEqual": - gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask8x64()).AsInt8x64() - case "MaskedLess": - gotv = vec0.MaskedLess(vec1, vec2.AsMask8x64()).AsInt8x64() - case "MaskedLessEqual": - gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask8x64()).AsInt8x64() - case "MaskedNotEqual": - gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask8x64()).AsInt8x64() + case "EqualMasked": + gotv = vec0.EqualMasked(vec1, vec2.AsMask8x64()).AsInt8x64() + case "GreaterEqualMasked": + gotv = vec0.GreaterEqualMasked(vec1, vec2.AsMask8x64()).AsInt8x64() + case "GreaterMasked": + gotv = vec0.GreaterMasked(vec1, vec2.AsMask8x64()).AsInt8x64() + case "LessEqualMasked": + gotv = vec0.LessEqualMasked(vec1, vec2.AsMask8x64()).AsInt8x64() + case "LessMasked": + gotv = vec0.LessMasked(vec1, vec2.AsMask8x64()).AsInt8x64() + case "NotEqualMasked": + gotv = vec0.NotEqualMasked(vec1, vec2.AsMask8x64()).AsInt8x64() default: t.Errorf("Unknown method: Int8x64.%s", which) @@ -2000,10 +2000,10 @@ func testInt8x64UnaryMasked(t *testing.T, v0 []int8, v1 []int8, want []int8, whi vec0 := simd.LoadInt8x64Slice(v0) vec1 := simd.LoadInt8x64Slice(v1) switch which { - case "MaskedAbsolute": - gotv = vec0.MaskedAbsolute(vec1.AsMask8x64()) - case "MaskedPopCount": - gotv = vec0.MaskedPopCount(vec1.AsMask8x64()) + case "AbsoluteMasked": + gotv = vec0.AbsoluteMasked(vec1.AsMask8x64()) + case "PopCountMasked": + gotv = vec0.PopCountMasked(vec1.AsMask8x64()) default: t.Errorf("Unknown method: Int8x64.%s", which) @@ -2083,28 +2083,28 @@ func testInt16x8BinaryMasked(t *testing.T, v0 []int16, v1 []int16, v2 []int16, w vec1 := simd.LoadInt16x8Slice(v1) vec2 := simd.LoadInt16x8Slice(v2) switch which { - case "MaskedAdd": - gotv = vec0.MaskedAdd(vec1, vec2.AsMask16x8()) - case "MaskedMax": - gotv = vec0.MaskedMax(vec1, vec2.AsMask16x8()) - case "MaskedMin": - gotv = vec0.MaskedMin(vec1, vec2.AsMask16x8()) - case "MaskedMulHigh": - gotv = vec0.MaskedMulHigh(vec1, vec2.AsMask16x8()) - case "MaskedMulLow": - gotv = vec0.MaskedMulLow(vec1, vec2.AsMask16x8()) - case "MaskedSaturatedAdd": - gotv = vec0.MaskedSaturatedAdd(vec1, vec2.AsMask16x8()) - case "MaskedSaturatedSub": - gotv = vec0.MaskedSaturatedSub(vec1, vec2.AsMask16x8()) - case "MaskedShiftLeft": - gotv = vec0.MaskedShiftLeft(vec1, vec2.AsMask16x8()) - case "MaskedShiftRight": - gotv = vec0.MaskedShiftRight(vec1, vec2.AsMask16x8()) - case "MaskedShiftRightSignExtended": - gotv = vec0.MaskedShiftRightSignExtended(vec1, vec2.AsMask16x8()) - case "MaskedSub": - gotv = vec0.MaskedSub(vec1, vec2.AsMask16x8()) + case "AddMasked": + gotv = vec0.AddMasked(vec1, vec2.AsMask16x8()) + case "MaxMasked": + gotv = vec0.MaxMasked(vec1, vec2.AsMask16x8()) + case "MinMasked": + gotv = vec0.MinMasked(vec1, vec2.AsMask16x8()) + case "MulHighMasked": + gotv = vec0.MulHighMasked(vec1, vec2.AsMask16x8()) + case "MulLowMasked": + gotv = vec0.MulLowMasked(vec1, vec2.AsMask16x8()) + case "SaturatedAddMasked": + gotv = vec0.SaturatedAddMasked(vec1, vec2.AsMask16x8()) + case "SaturatedSubMasked": + gotv = vec0.SaturatedSubMasked(vec1, vec2.AsMask16x8()) + case "ShiftLeftMasked": + gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask16x8()) + case "ShiftRightMasked": + gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask16x8()) + case "ShiftRightSignExtendedMasked": + gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask16x8()) + case "SubMasked": + gotv = vec0.SubMasked(vec1, vec2.AsMask16x8()) default: t.Errorf("Unknown method: Int16x8.%s", which) @@ -2125,8 +2125,8 @@ func testInt16x8BinaryMaskedWiden(t *testing.T, v0 []int16, v1 []int16, v2 []int vec1 := simd.LoadInt16x8Slice(v1) vec2 := simd.LoadInt16x8Slice(v2) switch which { - case "MaskedPairDotProd": - gotv = vec0.MaskedPairDotProd(vec1, vec2.AsMask16x8()) + case "PairDotProdMasked": + gotv = vec0.PairDotProdMasked(vec1, vec2.AsMask16x8()) default: t.Errorf("Unknown method: Int16x8.%s", which) @@ -2199,18 +2199,18 @@ func testInt16x8MaskedCompare(t *testing.T, v0 []int16, v1 []int16, v2 []int16, vec1 := simd.LoadInt16x8Slice(v1) vec2 := simd.LoadInt16x8Slice(v2) switch which { - case "MaskedEqual": - gotv = vec0.MaskedEqual(vec1, vec2.AsMask16x8()).AsInt16x8() - case "MaskedGreater": - gotv = vec0.MaskedGreater(vec1, vec2.AsMask16x8()).AsInt16x8() - case "MaskedGreaterEqual": - gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask16x8()).AsInt16x8() - case "MaskedLess": - gotv = vec0.MaskedLess(vec1, vec2.AsMask16x8()).AsInt16x8() - case "MaskedLessEqual": - gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask16x8()).AsInt16x8() - case "MaskedNotEqual": - gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask16x8()).AsInt16x8() + case "EqualMasked": + gotv = vec0.EqualMasked(vec1, vec2.AsMask16x8()).AsInt16x8() + case "GreaterEqualMasked": + gotv = vec0.GreaterEqualMasked(vec1, vec2.AsMask16x8()).AsInt16x8() + case "GreaterMasked": + gotv = vec0.GreaterMasked(vec1, vec2.AsMask16x8()).AsInt16x8() + case "LessEqualMasked": + gotv = vec0.LessEqualMasked(vec1, vec2.AsMask16x8()).AsInt16x8() + case "LessMasked": + gotv = vec0.LessMasked(vec1, vec2.AsMask16x8()).AsInt16x8() + case "NotEqualMasked": + gotv = vec0.NotEqualMasked(vec1, vec2.AsMask16x8()).AsInt16x8() default: t.Errorf("Unknown method: Int16x8.%s", which) @@ -2256,10 +2256,10 @@ func testInt16x8TernaryMasked(t *testing.T, v0 []int16, v1 []int16, v2 []int16, vec2 := simd.LoadInt16x8Slice(v2) vec3 := simd.LoadInt16x8Slice(v3) switch which { - case "MaskedShiftLeftAndFillUpperFrom": - gotv = vec0.MaskedShiftLeftAndFillUpperFrom(vec1, vec2, vec3.AsMask16x8()) - case "MaskedShiftRightAndFillUpperFrom": - gotv = vec0.MaskedShiftRightAndFillUpperFrom(vec1, vec2, vec3.AsMask16x8()) + case "ShiftLeftAndFillUpperFromMasked": + gotv = vec0.ShiftLeftAndFillUpperFromMasked(vec1, vec2, vec3.AsMask16x8()) + case "ShiftRightAndFillUpperFromMasked": + gotv = vec0.ShiftRightAndFillUpperFromMasked(vec1, vec2, vec3.AsMask16x8()) default: t.Errorf("Unknown method: Int16x8.%s", which) @@ -2301,10 +2301,10 @@ func testInt16x8UnaryMasked(t *testing.T, v0 []int16, v1 []int16, want []int16, vec0 := simd.LoadInt16x8Slice(v0) vec1 := simd.LoadInt16x8Slice(v1) switch which { - case "MaskedAbsolute": - gotv = vec0.MaskedAbsolute(vec1.AsMask16x8()) - case "MaskedPopCount": - gotv = vec0.MaskedPopCount(vec1.AsMask16x8()) + case "AbsoluteMasked": + gotv = vec0.AbsoluteMasked(vec1.AsMask16x8()) + case "PopCountMasked": + gotv = vec0.PopCountMasked(vec1.AsMask16x8()) default: t.Errorf("Unknown method: Int16x8.%s", which) @@ -2384,28 +2384,28 @@ func testInt16x16BinaryMasked(t *testing.T, v0 []int16, v1 []int16, v2 []int16, vec1 := simd.LoadInt16x16Slice(v1) vec2 := simd.LoadInt16x16Slice(v2) switch which { - case "MaskedAdd": - gotv = vec0.MaskedAdd(vec1, vec2.AsMask16x16()) - case "MaskedMax": - gotv = vec0.MaskedMax(vec1, vec2.AsMask16x16()) - case "MaskedMin": - gotv = vec0.MaskedMin(vec1, vec2.AsMask16x16()) - case "MaskedMulHigh": - gotv = vec0.MaskedMulHigh(vec1, vec2.AsMask16x16()) - case "MaskedMulLow": - gotv = vec0.MaskedMulLow(vec1, vec2.AsMask16x16()) - case "MaskedSaturatedAdd": - gotv = vec0.MaskedSaturatedAdd(vec1, vec2.AsMask16x16()) - case "MaskedSaturatedSub": - gotv = vec0.MaskedSaturatedSub(vec1, vec2.AsMask16x16()) - case "MaskedShiftLeft": - gotv = vec0.MaskedShiftLeft(vec1, vec2.AsMask16x16()) - case "MaskedShiftRight": - gotv = vec0.MaskedShiftRight(vec1, vec2.AsMask16x16()) - case "MaskedShiftRightSignExtended": - gotv = vec0.MaskedShiftRightSignExtended(vec1, vec2.AsMask16x16()) - case "MaskedSub": - gotv = vec0.MaskedSub(vec1, vec2.AsMask16x16()) + case "AddMasked": + gotv = vec0.AddMasked(vec1, vec2.AsMask16x16()) + case "MaxMasked": + gotv = vec0.MaxMasked(vec1, vec2.AsMask16x16()) + case "MinMasked": + gotv = vec0.MinMasked(vec1, vec2.AsMask16x16()) + case "MulHighMasked": + gotv = vec0.MulHighMasked(vec1, vec2.AsMask16x16()) + case "MulLowMasked": + gotv = vec0.MulLowMasked(vec1, vec2.AsMask16x16()) + case "SaturatedAddMasked": + gotv = vec0.SaturatedAddMasked(vec1, vec2.AsMask16x16()) + case "SaturatedSubMasked": + gotv = vec0.SaturatedSubMasked(vec1, vec2.AsMask16x16()) + case "ShiftLeftMasked": + gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask16x16()) + case "ShiftRightMasked": + gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask16x16()) + case "ShiftRightSignExtendedMasked": + gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask16x16()) + case "SubMasked": + gotv = vec0.SubMasked(vec1, vec2.AsMask16x16()) default: t.Errorf("Unknown method: Int16x16.%s", which) @@ -2426,8 +2426,8 @@ func testInt16x16BinaryMaskedWiden(t *testing.T, v0 []int16, v1 []int16, v2 []in vec1 := simd.LoadInt16x16Slice(v1) vec2 := simd.LoadInt16x16Slice(v2) switch which { - case "MaskedPairDotProd": - gotv = vec0.MaskedPairDotProd(vec1, vec2.AsMask16x16()) + case "PairDotProdMasked": + gotv = vec0.PairDotProdMasked(vec1, vec2.AsMask16x16()) default: t.Errorf("Unknown method: Int16x16.%s", which) @@ -2500,18 +2500,18 @@ func testInt16x16MaskedCompare(t *testing.T, v0 []int16, v1 []int16, v2 []int16, vec1 := simd.LoadInt16x16Slice(v1) vec2 := simd.LoadInt16x16Slice(v2) switch which { - case "MaskedEqual": - gotv = vec0.MaskedEqual(vec1, vec2.AsMask16x16()).AsInt16x16() - case "MaskedGreater": - gotv = vec0.MaskedGreater(vec1, vec2.AsMask16x16()).AsInt16x16() - case "MaskedGreaterEqual": - gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask16x16()).AsInt16x16() - case "MaskedLess": - gotv = vec0.MaskedLess(vec1, vec2.AsMask16x16()).AsInt16x16() - case "MaskedLessEqual": - gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask16x16()).AsInt16x16() - case "MaskedNotEqual": - gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask16x16()).AsInt16x16() + case "EqualMasked": + gotv = vec0.EqualMasked(vec1, vec2.AsMask16x16()).AsInt16x16() + case "GreaterEqualMasked": + gotv = vec0.GreaterEqualMasked(vec1, vec2.AsMask16x16()).AsInt16x16() + case "GreaterMasked": + gotv = vec0.GreaterMasked(vec1, vec2.AsMask16x16()).AsInt16x16() + case "LessEqualMasked": + gotv = vec0.LessEqualMasked(vec1, vec2.AsMask16x16()).AsInt16x16() + case "LessMasked": + gotv = vec0.LessMasked(vec1, vec2.AsMask16x16()).AsInt16x16() + case "NotEqualMasked": + gotv = vec0.NotEqualMasked(vec1, vec2.AsMask16x16()).AsInt16x16() default: t.Errorf("Unknown method: Int16x16.%s", which) @@ -2557,10 +2557,10 @@ func testInt16x16TernaryMasked(t *testing.T, v0 []int16, v1 []int16, v2 []int16, vec2 := simd.LoadInt16x16Slice(v2) vec3 := simd.LoadInt16x16Slice(v3) switch which { - case "MaskedShiftLeftAndFillUpperFrom": - gotv = vec0.MaskedShiftLeftAndFillUpperFrom(vec1, vec2, vec3.AsMask16x16()) - case "MaskedShiftRightAndFillUpperFrom": - gotv = vec0.MaskedShiftRightAndFillUpperFrom(vec1, vec2, vec3.AsMask16x16()) + case "ShiftLeftAndFillUpperFromMasked": + gotv = vec0.ShiftLeftAndFillUpperFromMasked(vec1, vec2, vec3.AsMask16x16()) + case "ShiftRightAndFillUpperFromMasked": + gotv = vec0.ShiftRightAndFillUpperFromMasked(vec1, vec2, vec3.AsMask16x16()) default: t.Errorf("Unknown method: Int16x16.%s", which) @@ -2602,10 +2602,10 @@ func testInt16x16UnaryMasked(t *testing.T, v0 []int16, v1 []int16, want []int16, vec0 := simd.LoadInt16x16Slice(v0) vec1 := simd.LoadInt16x16Slice(v1) switch which { - case "MaskedAbsolute": - gotv = vec0.MaskedAbsolute(vec1.AsMask16x16()) - case "MaskedPopCount": - gotv = vec0.MaskedPopCount(vec1.AsMask16x16()) + case "AbsoluteMasked": + gotv = vec0.AbsoluteMasked(vec1.AsMask16x16()) + case "PopCountMasked": + gotv = vec0.PopCountMasked(vec1.AsMask16x16()) default: t.Errorf("Unknown method: Int16x16.%s", which) @@ -2667,28 +2667,28 @@ func testInt16x32BinaryMasked(t *testing.T, v0 []int16, v1 []int16, v2 []int16, vec1 := simd.LoadInt16x32Slice(v1) vec2 := simd.LoadInt16x32Slice(v2) switch which { - case "MaskedAdd": - gotv = vec0.MaskedAdd(vec1, vec2.AsMask16x32()) - case "MaskedMax": - gotv = vec0.MaskedMax(vec1, vec2.AsMask16x32()) - case "MaskedMin": - gotv = vec0.MaskedMin(vec1, vec2.AsMask16x32()) - case "MaskedMulHigh": - gotv = vec0.MaskedMulHigh(vec1, vec2.AsMask16x32()) - case "MaskedMulLow": - gotv = vec0.MaskedMulLow(vec1, vec2.AsMask16x32()) - case "MaskedSaturatedAdd": - gotv = vec0.MaskedSaturatedAdd(vec1, vec2.AsMask16x32()) - case "MaskedSaturatedSub": - gotv = vec0.MaskedSaturatedSub(vec1, vec2.AsMask16x32()) - case "MaskedShiftLeft": - gotv = vec0.MaskedShiftLeft(vec1, vec2.AsMask16x32()) - case "MaskedShiftRight": - gotv = vec0.MaskedShiftRight(vec1, vec2.AsMask16x32()) - case "MaskedShiftRightSignExtended": - gotv = vec0.MaskedShiftRightSignExtended(vec1, vec2.AsMask16x32()) - case "MaskedSub": - gotv = vec0.MaskedSub(vec1, vec2.AsMask16x32()) + case "AddMasked": + gotv = vec0.AddMasked(vec1, vec2.AsMask16x32()) + case "MaxMasked": + gotv = vec0.MaxMasked(vec1, vec2.AsMask16x32()) + case "MinMasked": + gotv = vec0.MinMasked(vec1, vec2.AsMask16x32()) + case "MulHighMasked": + gotv = vec0.MulHighMasked(vec1, vec2.AsMask16x32()) + case "MulLowMasked": + gotv = vec0.MulLowMasked(vec1, vec2.AsMask16x32()) + case "SaturatedAddMasked": + gotv = vec0.SaturatedAddMasked(vec1, vec2.AsMask16x32()) + case "SaturatedSubMasked": + gotv = vec0.SaturatedSubMasked(vec1, vec2.AsMask16x32()) + case "ShiftLeftMasked": + gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask16x32()) + case "ShiftRightMasked": + gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask16x32()) + case "ShiftRightSignExtendedMasked": + gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask16x32()) + case "SubMasked": + gotv = vec0.SubMasked(vec1, vec2.AsMask16x32()) default: t.Errorf("Unknown method: Int16x32.%s", which) @@ -2709,8 +2709,8 @@ func testInt16x32BinaryMaskedWiden(t *testing.T, v0 []int16, v1 []int16, v2 []in vec1 := simd.LoadInt16x32Slice(v1) vec2 := simd.LoadInt16x32Slice(v2) switch which { - case "MaskedPairDotProd": - gotv = vec0.MaskedPairDotProd(vec1, vec2.AsMask16x32()) + case "PairDotProdMasked": + gotv = vec0.PairDotProdMasked(vec1, vec2.AsMask16x32()) default: t.Errorf("Unknown method: Int16x32.%s", which) @@ -2783,18 +2783,18 @@ func testInt16x32MaskedCompare(t *testing.T, v0 []int16, v1 []int16, v2 []int16, vec1 := simd.LoadInt16x32Slice(v1) vec2 := simd.LoadInt16x32Slice(v2) switch which { - case "MaskedEqual": - gotv = vec0.MaskedEqual(vec1, vec2.AsMask16x32()).AsInt16x32() - case "MaskedGreater": - gotv = vec0.MaskedGreater(vec1, vec2.AsMask16x32()).AsInt16x32() - case "MaskedGreaterEqual": - gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask16x32()).AsInt16x32() - case "MaskedLess": - gotv = vec0.MaskedLess(vec1, vec2.AsMask16x32()).AsInt16x32() - case "MaskedLessEqual": - gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask16x32()).AsInt16x32() - case "MaskedNotEqual": - gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask16x32()).AsInt16x32() + case "EqualMasked": + gotv = vec0.EqualMasked(vec1, vec2.AsMask16x32()).AsInt16x32() + case "GreaterEqualMasked": + gotv = vec0.GreaterEqualMasked(vec1, vec2.AsMask16x32()).AsInt16x32() + case "GreaterMasked": + gotv = vec0.GreaterMasked(vec1, vec2.AsMask16x32()).AsInt16x32() + case "LessEqualMasked": + gotv = vec0.LessEqualMasked(vec1, vec2.AsMask16x32()).AsInt16x32() + case "LessMasked": + gotv = vec0.LessMasked(vec1, vec2.AsMask16x32()).AsInt16x32() + case "NotEqualMasked": + gotv = vec0.NotEqualMasked(vec1, vec2.AsMask16x32()).AsInt16x32() default: t.Errorf("Unknown method: Int16x32.%s", which) @@ -2840,10 +2840,10 @@ func testInt16x32TernaryMasked(t *testing.T, v0 []int16, v1 []int16, v2 []int16, vec2 := simd.LoadInt16x32Slice(v2) vec3 := simd.LoadInt16x32Slice(v3) switch which { - case "MaskedShiftLeftAndFillUpperFrom": - gotv = vec0.MaskedShiftLeftAndFillUpperFrom(vec1, vec2, vec3.AsMask16x32()) - case "MaskedShiftRightAndFillUpperFrom": - gotv = vec0.MaskedShiftRightAndFillUpperFrom(vec1, vec2, vec3.AsMask16x32()) + case "ShiftLeftAndFillUpperFromMasked": + gotv = vec0.ShiftLeftAndFillUpperFromMasked(vec1, vec2, vec3.AsMask16x32()) + case "ShiftRightAndFillUpperFromMasked": + gotv = vec0.ShiftRightAndFillUpperFromMasked(vec1, vec2, vec3.AsMask16x32()) default: t.Errorf("Unknown method: Int16x32.%s", which) @@ -2885,10 +2885,10 @@ func testInt16x32UnaryMasked(t *testing.T, v0 []int16, v1 []int16, want []int16, vec0 := simd.LoadInt16x32Slice(v0) vec1 := simd.LoadInt16x32Slice(v1) switch which { - case "MaskedAbsolute": - gotv = vec0.MaskedAbsolute(vec1.AsMask16x32()) - case "MaskedPopCount": - gotv = vec0.MaskedPopCount(vec1.AsMask16x32()) + case "AbsoluteMasked": + gotv = vec0.AbsoluteMasked(vec1.AsMask16x32()) + case "PopCountMasked": + gotv = vec0.PopCountMasked(vec1.AsMask16x32()) default: t.Errorf("Unknown method: Int16x32.%s", which) @@ -2962,34 +2962,34 @@ func testInt32x4BinaryMasked(t *testing.T, v0 []int32, v1 []int32, v2 []int32, w vec1 := simd.LoadInt32x4Slice(v1) vec2 := simd.LoadInt32x4Slice(v2) switch which { - case "MaskedAdd": - gotv = vec0.MaskedAdd(vec1, vec2.AsMask32x4()) - case "MaskedAnd": - gotv = vec0.MaskedAnd(vec1, vec2.AsMask32x4()) - case "MaskedAndNot": - gotv = vec0.MaskedAndNot(vec1, vec2.AsMask32x4()) - case "MaskedMax": - gotv = vec0.MaskedMax(vec1, vec2.AsMask32x4()) - case "MaskedMin": - gotv = vec0.MaskedMin(vec1, vec2.AsMask32x4()) - case "MaskedMulLow": - gotv = vec0.MaskedMulLow(vec1, vec2.AsMask32x4()) - case "MaskedOr": - gotv = vec0.MaskedOr(vec1, vec2.AsMask32x4()) - case "MaskedRotateLeft": - gotv = vec0.MaskedRotateLeft(vec1, vec2.AsMask32x4()) - case "MaskedRotateRight": - gotv = vec0.MaskedRotateRight(vec1, vec2.AsMask32x4()) - case "MaskedShiftLeft": - gotv = vec0.MaskedShiftLeft(vec1, vec2.AsMask32x4()) - case "MaskedShiftRight": - gotv = vec0.MaskedShiftRight(vec1, vec2.AsMask32x4()) - case "MaskedShiftRightSignExtended": - gotv = vec0.MaskedShiftRightSignExtended(vec1, vec2.AsMask32x4()) - case "MaskedSub": - gotv = vec0.MaskedSub(vec1, vec2.AsMask32x4()) - case "MaskedXor": - gotv = vec0.MaskedXor(vec1, vec2.AsMask32x4()) + case "AddMasked": + gotv = vec0.AddMasked(vec1, vec2.AsMask32x4()) + case "AndMasked": + gotv = vec0.AndMasked(vec1, vec2.AsMask32x4()) + case "AndNotMasked": + gotv = vec0.AndNotMasked(vec1, vec2.AsMask32x4()) + case "MaxMasked": + gotv = vec0.MaxMasked(vec1, vec2.AsMask32x4()) + case "MinMasked": + gotv = vec0.MinMasked(vec1, vec2.AsMask32x4()) + case "MulLowMasked": + gotv = vec0.MulLowMasked(vec1, vec2.AsMask32x4()) + case "OrMasked": + gotv = vec0.OrMasked(vec1, vec2.AsMask32x4()) + case "RotateLeftMasked": + gotv = vec0.RotateLeftMasked(vec1, vec2.AsMask32x4()) + case "RotateRightMasked": + gotv = vec0.RotateRightMasked(vec1, vec2.AsMask32x4()) + case "ShiftLeftMasked": + gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask32x4()) + case "ShiftRightMasked": + gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask32x4()) + case "ShiftRightSignExtendedMasked": + gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask32x4()) + case "SubMasked": + gotv = vec0.SubMasked(vec1, vec2.AsMask32x4()) + case "XorMasked": + gotv = vec0.XorMasked(vec1, vec2.AsMask32x4()) default: t.Errorf("Unknown method: Int32x4.%s", which) @@ -3087,10 +3087,10 @@ func testInt32x4Int16x8Int16x8Mask32x4Int32x4(t *testing.T, v0 []int32, v1 []int vec2 := simd.LoadInt16x8Slice(v2) vec3 := simd.LoadInt32x4Slice(v3) switch which { - case "MaskedPairDotProdAccumulate": - gotv = vec0.MaskedPairDotProdAccumulate(vec1, vec2, vec3.AsMask32x4()) - case "MaskedSaturatedPairDotProdAccumulate": - gotv = vec0.MaskedSaturatedPairDotProdAccumulate(vec1, vec2, vec3.AsMask32x4()) + case "PairDotProdAccumulateMasked": + gotv = vec0.PairDotProdAccumulateMasked(vec1, vec2, vec3.AsMask32x4()) + case "SaturatedPairDotProdAccumulateMasked": + gotv = vec0.SaturatedPairDotProdAccumulateMasked(vec1, vec2, vec3.AsMask32x4()) default: t.Errorf("Unknown method: Int32x4.%s", which) @@ -3111,18 +3111,18 @@ func testInt32x4MaskedCompare(t *testing.T, v0 []int32, v1 []int32, v2 []int32, vec1 := simd.LoadInt32x4Slice(v1) vec2 := simd.LoadInt32x4Slice(v2) switch which { - case "MaskedEqual": - gotv = vec0.MaskedEqual(vec1, vec2.AsMask32x4()).AsInt32x4() - case "MaskedGreater": - gotv = vec0.MaskedGreater(vec1, vec2.AsMask32x4()).AsInt32x4() - case "MaskedGreaterEqual": - gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask32x4()).AsInt32x4() - case "MaskedLess": - gotv = vec0.MaskedLess(vec1, vec2.AsMask32x4()).AsInt32x4() - case "MaskedLessEqual": - gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask32x4()).AsInt32x4() - case "MaskedNotEqual": - gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask32x4()).AsInt32x4() + case "EqualMasked": + gotv = vec0.EqualMasked(vec1, vec2.AsMask32x4()).AsInt32x4() + case "GreaterEqualMasked": + gotv = vec0.GreaterEqualMasked(vec1, vec2.AsMask32x4()).AsInt32x4() + case "GreaterMasked": + gotv = vec0.GreaterMasked(vec1, vec2.AsMask32x4()).AsInt32x4() + case "LessEqualMasked": + gotv = vec0.LessEqualMasked(vec1, vec2.AsMask32x4()).AsInt32x4() + case "LessMasked": + gotv = vec0.LessMasked(vec1, vec2.AsMask32x4()).AsInt32x4() + case "NotEqualMasked": + gotv = vec0.NotEqualMasked(vec1, vec2.AsMask32x4()).AsInt32x4() default: t.Errorf("Unknown method: Int32x4.%s", which) @@ -3168,10 +3168,10 @@ func testInt32x4TernaryMasked(t *testing.T, v0 []int32, v1 []int32, v2 []int32, vec2 := simd.LoadInt32x4Slice(v2) vec3 := simd.LoadInt32x4Slice(v3) switch which { - case "MaskedShiftLeftAndFillUpperFrom": - gotv = vec0.MaskedShiftLeftAndFillUpperFrom(vec1, vec2, vec3.AsMask32x4()) - case "MaskedShiftRightAndFillUpperFrom": - gotv = vec0.MaskedShiftRightAndFillUpperFrom(vec1, vec2, vec3.AsMask32x4()) + case "ShiftLeftAndFillUpperFromMasked": + gotv = vec0.ShiftLeftAndFillUpperFromMasked(vec1, vec2, vec3.AsMask32x4()) + case "ShiftRightAndFillUpperFromMasked": + gotv = vec0.ShiftRightAndFillUpperFromMasked(vec1, vec2, vec3.AsMask32x4()) default: t.Errorf("Unknown method: Int32x4.%s", which) @@ -3217,10 +3217,10 @@ func testInt32x4Uint8x16Int8x16Mask32x4Int32x4(t *testing.T, v0 []int32, v1 []ui vec2 := simd.LoadInt8x16Slice(v2) vec3 := simd.LoadInt32x4Slice(v3) switch which { - case "MaskedSaturatedUnsignedSignedQuadDotProdAccumulate": - gotv = vec0.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(vec1, vec2, vec3.AsMask32x4()) - case "MaskedUnsignedSignedQuadDotProdAccumulate": - gotv = vec0.MaskedUnsignedSignedQuadDotProdAccumulate(vec1, vec2, vec3.AsMask32x4()) + case "SaturatedUnsignedSignedQuadDotProdAccumulateMasked": + gotv = vec0.SaturatedUnsignedSignedQuadDotProdAccumulateMasked(vec1, vec2, vec3.AsMask32x4()) + case "UnsignedSignedQuadDotProdAccumulateMasked": + gotv = vec0.UnsignedSignedQuadDotProdAccumulateMasked(vec1, vec2, vec3.AsMask32x4()) default: t.Errorf("Unknown method: Int32x4.%s", which) @@ -3262,10 +3262,10 @@ func testInt32x4UnaryMasked(t *testing.T, v0 []int32, v1 []int32, want []int32, vec0 := simd.LoadInt32x4Slice(v0) vec1 := simd.LoadInt32x4Slice(v1) switch which { - case "MaskedAbsolute": - gotv = vec0.MaskedAbsolute(vec1.AsMask32x4()) - case "MaskedPopCount": - gotv = vec0.MaskedPopCount(vec1.AsMask32x4()) + case "AbsoluteMasked": + gotv = vec0.AbsoluteMasked(vec1.AsMask32x4()) + case "PopCountMasked": + gotv = vec0.PopCountMasked(vec1.AsMask32x4()) default: t.Errorf("Unknown method: Int32x4.%s", which) @@ -3339,34 +3339,34 @@ func testInt32x8BinaryMasked(t *testing.T, v0 []int32, v1 []int32, v2 []int32, w vec1 := simd.LoadInt32x8Slice(v1) vec2 := simd.LoadInt32x8Slice(v2) switch which { - case "MaskedAdd": - gotv = vec0.MaskedAdd(vec1, vec2.AsMask32x8()) - case "MaskedAnd": - gotv = vec0.MaskedAnd(vec1, vec2.AsMask32x8()) - case "MaskedAndNot": - gotv = vec0.MaskedAndNot(vec1, vec2.AsMask32x8()) - case "MaskedMax": - gotv = vec0.MaskedMax(vec1, vec2.AsMask32x8()) - case "MaskedMin": - gotv = vec0.MaskedMin(vec1, vec2.AsMask32x8()) - case "MaskedMulLow": - gotv = vec0.MaskedMulLow(vec1, vec2.AsMask32x8()) - case "MaskedOr": - gotv = vec0.MaskedOr(vec1, vec2.AsMask32x8()) - case "MaskedRotateLeft": - gotv = vec0.MaskedRotateLeft(vec1, vec2.AsMask32x8()) - case "MaskedRotateRight": - gotv = vec0.MaskedRotateRight(vec1, vec2.AsMask32x8()) - case "MaskedShiftLeft": - gotv = vec0.MaskedShiftLeft(vec1, vec2.AsMask32x8()) - case "MaskedShiftRight": - gotv = vec0.MaskedShiftRight(vec1, vec2.AsMask32x8()) - case "MaskedShiftRightSignExtended": - gotv = vec0.MaskedShiftRightSignExtended(vec1, vec2.AsMask32x8()) - case "MaskedSub": - gotv = vec0.MaskedSub(vec1, vec2.AsMask32x8()) - case "MaskedXor": - gotv = vec0.MaskedXor(vec1, vec2.AsMask32x8()) + case "AddMasked": + gotv = vec0.AddMasked(vec1, vec2.AsMask32x8()) + case "AndMasked": + gotv = vec0.AndMasked(vec1, vec2.AsMask32x8()) + case "AndNotMasked": + gotv = vec0.AndNotMasked(vec1, vec2.AsMask32x8()) + case "MaxMasked": + gotv = vec0.MaxMasked(vec1, vec2.AsMask32x8()) + case "MinMasked": + gotv = vec0.MinMasked(vec1, vec2.AsMask32x8()) + case "MulLowMasked": + gotv = vec0.MulLowMasked(vec1, vec2.AsMask32x8()) + case "OrMasked": + gotv = vec0.OrMasked(vec1, vec2.AsMask32x8()) + case "RotateLeftMasked": + gotv = vec0.RotateLeftMasked(vec1, vec2.AsMask32x8()) + case "RotateRightMasked": + gotv = vec0.RotateRightMasked(vec1, vec2.AsMask32x8()) + case "ShiftLeftMasked": + gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask32x8()) + case "ShiftRightMasked": + gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask32x8()) + case "ShiftRightSignExtendedMasked": + gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask32x8()) + case "SubMasked": + gotv = vec0.SubMasked(vec1, vec2.AsMask32x8()) + case "XorMasked": + gotv = vec0.XorMasked(vec1, vec2.AsMask32x8()) default: t.Errorf("Unknown method: Int32x8.%s", which) @@ -3464,10 +3464,10 @@ func testInt32x8Int16x16Int16x16Mask32x8Int32x8(t *testing.T, v0 []int32, v1 []i vec2 := simd.LoadInt16x16Slice(v2) vec3 := simd.LoadInt32x8Slice(v3) switch which { - case "MaskedPairDotProdAccumulate": - gotv = vec0.MaskedPairDotProdAccumulate(vec1, vec2, vec3.AsMask32x8()) - case "MaskedSaturatedPairDotProdAccumulate": - gotv = vec0.MaskedSaturatedPairDotProdAccumulate(vec1, vec2, vec3.AsMask32x8()) + case "PairDotProdAccumulateMasked": + gotv = vec0.PairDotProdAccumulateMasked(vec1, vec2, vec3.AsMask32x8()) + case "SaturatedPairDotProdAccumulateMasked": + gotv = vec0.SaturatedPairDotProdAccumulateMasked(vec1, vec2, vec3.AsMask32x8()) default: t.Errorf("Unknown method: Int32x8.%s", which) @@ -3488,18 +3488,18 @@ func testInt32x8MaskedCompare(t *testing.T, v0 []int32, v1 []int32, v2 []int32, vec1 := simd.LoadInt32x8Slice(v1) vec2 := simd.LoadInt32x8Slice(v2) switch which { - case "MaskedEqual": - gotv = vec0.MaskedEqual(vec1, vec2.AsMask32x8()).AsInt32x8() - case "MaskedGreater": - gotv = vec0.MaskedGreater(vec1, vec2.AsMask32x8()).AsInt32x8() - case "MaskedGreaterEqual": - gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask32x8()).AsInt32x8() - case "MaskedLess": - gotv = vec0.MaskedLess(vec1, vec2.AsMask32x8()).AsInt32x8() - case "MaskedLessEqual": - gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask32x8()).AsInt32x8() - case "MaskedNotEqual": - gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask32x8()).AsInt32x8() + case "EqualMasked": + gotv = vec0.EqualMasked(vec1, vec2.AsMask32x8()).AsInt32x8() + case "GreaterEqualMasked": + gotv = vec0.GreaterEqualMasked(vec1, vec2.AsMask32x8()).AsInt32x8() + case "GreaterMasked": + gotv = vec0.GreaterMasked(vec1, vec2.AsMask32x8()).AsInt32x8() + case "LessEqualMasked": + gotv = vec0.LessEqualMasked(vec1, vec2.AsMask32x8()).AsInt32x8() + case "LessMasked": + gotv = vec0.LessMasked(vec1, vec2.AsMask32x8()).AsInt32x8() + case "NotEqualMasked": + gotv = vec0.NotEqualMasked(vec1, vec2.AsMask32x8()).AsInt32x8() default: t.Errorf("Unknown method: Int32x8.%s", which) @@ -3545,10 +3545,10 @@ func testInt32x8TernaryMasked(t *testing.T, v0 []int32, v1 []int32, v2 []int32, vec2 := simd.LoadInt32x8Slice(v2) vec3 := simd.LoadInt32x8Slice(v3) switch which { - case "MaskedShiftLeftAndFillUpperFrom": - gotv = vec0.MaskedShiftLeftAndFillUpperFrom(vec1, vec2, vec3.AsMask32x8()) - case "MaskedShiftRightAndFillUpperFrom": - gotv = vec0.MaskedShiftRightAndFillUpperFrom(vec1, vec2, vec3.AsMask32x8()) + case "ShiftLeftAndFillUpperFromMasked": + gotv = vec0.ShiftLeftAndFillUpperFromMasked(vec1, vec2, vec3.AsMask32x8()) + case "ShiftRightAndFillUpperFromMasked": + gotv = vec0.ShiftRightAndFillUpperFromMasked(vec1, vec2, vec3.AsMask32x8()) default: t.Errorf("Unknown method: Int32x8.%s", which) @@ -3594,10 +3594,10 @@ func testInt32x8Uint8x32Int8x32Mask32x8Int32x8(t *testing.T, v0 []int32, v1 []ui vec2 := simd.LoadInt8x32Slice(v2) vec3 := simd.LoadInt32x8Slice(v3) switch which { - case "MaskedSaturatedUnsignedSignedQuadDotProdAccumulate": - gotv = vec0.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(vec1, vec2, vec3.AsMask32x8()) - case "MaskedUnsignedSignedQuadDotProdAccumulate": - gotv = vec0.MaskedUnsignedSignedQuadDotProdAccumulate(vec1, vec2, vec3.AsMask32x8()) + case "SaturatedUnsignedSignedQuadDotProdAccumulateMasked": + gotv = vec0.SaturatedUnsignedSignedQuadDotProdAccumulateMasked(vec1, vec2, vec3.AsMask32x8()) + case "UnsignedSignedQuadDotProdAccumulateMasked": + gotv = vec0.UnsignedSignedQuadDotProdAccumulateMasked(vec1, vec2, vec3.AsMask32x8()) default: t.Errorf("Unknown method: Int32x8.%s", which) @@ -3639,10 +3639,10 @@ func testInt32x8UnaryMasked(t *testing.T, v0 []int32, v1 []int32, want []int32, vec0 := simd.LoadInt32x8Slice(v0) vec1 := simd.LoadInt32x8Slice(v1) switch which { - case "MaskedAbsolute": - gotv = vec0.MaskedAbsolute(vec1.AsMask32x8()) - case "MaskedPopCount": - gotv = vec0.MaskedPopCount(vec1.AsMask32x8()) + case "AbsoluteMasked": + gotv = vec0.AbsoluteMasked(vec1.AsMask32x8()) + case "PopCountMasked": + gotv = vec0.PopCountMasked(vec1.AsMask32x8()) default: t.Errorf("Unknown method: Int32x8.%s", which) @@ -3710,34 +3710,34 @@ func testInt32x16BinaryMasked(t *testing.T, v0 []int32, v1 []int32, v2 []int32, vec1 := simd.LoadInt32x16Slice(v1) vec2 := simd.LoadInt32x16Slice(v2) switch which { - case "MaskedAdd": - gotv = vec0.MaskedAdd(vec1, vec2.AsMask32x16()) - case "MaskedAnd": - gotv = vec0.MaskedAnd(vec1, vec2.AsMask32x16()) - case "MaskedAndNot": - gotv = vec0.MaskedAndNot(vec1, vec2.AsMask32x16()) - case "MaskedMax": - gotv = vec0.MaskedMax(vec1, vec2.AsMask32x16()) - case "MaskedMin": - gotv = vec0.MaskedMin(vec1, vec2.AsMask32x16()) - case "MaskedMulLow": - gotv = vec0.MaskedMulLow(vec1, vec2.AsMask32x16()) - case "MaskedOr": - gotv = vec0.MaskedOr(vec1, vec2.AsMask32x16()) - case "MaskedRotateLeft": - gotv = vec0.MaskedRotateLeft(vec1, vec2.AsMask32x16()) - case "MaskedRotateRight": - gotv = vec0.MaskedRotateRight(vec1, vec2.AsMask32x16()) - case "MaskedShiftLeft": - gotv = vec0.MaskedShiftLeft(vec1, vec2.AsMask32x16()) - case "MaskedShiftRight": - gotv = vec0.MaskedShiftRight(vec1, vec2.AsMask32x16()) - case "MaskedShiftRightSignExtended": - gotv = vec0.MaskedShiftRightSignExtended(vec1, vec2.AsMask32x16()) - case "MaskedSub": - gotv = vec0.MaskedSub(vec1, vec2.AsMask32x16()) - case "MaskedXor": - gotv = vec0.MaskedXor(vec1, vec2.AsMask32x16()) + case "AddMasked": + gotv = vec0.AddMasked(vec1, vec2.AsMask32x16()) + case "AndMasked": + gotv = vec0.AndMasked(vec1, vec2.AsMask32x16()) + case "AndNotMasked": + gotv = vec0.AndNotMasked(vec1, vec2.AsMask32x16()) + case "MaxMasked": + gotv = vec0.MaxMasked(vec1, vec2.AsMask32x16()) + case "MinMasked": + gotv = vec0.MinMasked(vec1, vec2.AsMask32x16()) + case "MulLowMasked": + gotv = vec0.MulLowMasked(vec1, vec2.AsMask32x16()) + case "OrMasked": + gotv = vec0.OrMasked(vec1, vec2.AsMask32x16()) + case "RotateLeftMasked": + gotv = vec0.RotateLeftMasked(vec1, vec2.AsMask32x16()) + case "RotateRightMasked": + gotv = vec0.RotateRightMasked(vec1, vec2.AsMask32x16()) + case "ShiftLeftMasked": + gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask32x16()) + case "ShiftRightMasked": + gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask32x16()) + case "ShiftRightSignExtendedMasked": + gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask32x16()) + case "SubMasked": + gotv = vec0.SubMasked(vec1, vec2.AsMask32x16()) + case "XorMasked": + gotv = vec0.XorMasked(vec1, vec2.AsMask32x16()) default: t.Errorf("Unknown method: Int32x16.%s", which) @@ -3814,10 +3814,10 @@ func testInt32x16Int16x32Int16x32Mask32x16Int32x16(t *testing.T, v0 []int32, v1 vec2 := simd.LoadInt16x32Slice(v2) vec3 := simd.LoadInt32x16Slice(v3) switch which { - case "MaskedPairDotProdAccumulate": - gotv = vec0.MaskedPairDotProdAccumulate(vec1, vec2, vec3.AsMask32x16()) - case "MaskedSaturatedPairDotProdAccumulate": - gotv = vec0.MaskedSaturatedPairDotProdAccumulate(vec1, vec2, vec3.AsMask32x16()) + case "PairDotProdAccumulateMasked": + gotv = vec0.PairDotProdAccumulateMasked(vec1, vec2, vec3.AsMask32x16()) + case "SaturatedPairDotProdAccumulateMasked": + gotv = vec0.SaturatedPairDotProdAccumulateMasked(vec1, vec2, vec3.AsMask32x16()) default: t.Errorf("Unknown method: Int32x16.%s", which) @@ -3838,18 +3838,18 @@ func testInt32x16MaskedCompare(t *testing.T, v0 []int32, v1 []int32, v2 []int32, vec1 := simd.LoadInt32x16Slice(v1) vec2 := simd.LoadInt32x16Slice(v2) switch which { - case "MaskedEqual": - gotv = vec0.MaskedEqual(vec1, vec2.AsMask32x16()).AsInt32x16() - case "MaskedGreater": - gotv = vec0.MaskedGreater(vec1, vec2.AsMask32x16()).AsInt32x16() - case "MaskedGreaterEqual": - gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask32x16()).AsInt32x16() - case "MaskedLess": - gotv = vec0.MaskedLess(vec1, vec2.AsMask32x16()).AsInt32x16() - case "MaskedLessEqual": - gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask32x16()).AsInt32x16() - case "MaskedNotEqual": - gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask32x16()).AsInt32x16() + case "EqualMasked": + gotv = vec0.EqualMasked(vec1, vec2.AsMask32x16()).AsInt32x16() + case "GreaterEqualMasked": + gotv = vec0.GreaterEqualMasked(vec1, vec2.AsMask32x16()).AsInt32x16() + case "GreaterMasked": + gotv = vec0.GreaterMasked(vec1, vec2.AsMask32x16()).AsInt32x16() + case "LessEqualMasked": + gotv = vec0.LessEqualMasked(vec1, vec2.AsMask32x16()).AsInt32x16() + case "LessMasked": + gotv = vec0.LessMasked(vec1, vec2.AsMask32x16()).AsInt32x16() + case "NotEqualMasked": + gotv = vec0.NotEqualMasked(vec1, vec2.AsMask32x16()).AsInt32x16() default: t.Errorf("Unknown method: Int32x16.%s", which) @@ -3895,10 +3895,10 @@ func testInt32x16TernaryMasked(t *testing.T, v0 []int32, v1 []int32, v2 []int32, vec2 := simd.LoadInt32x16Slice(v2) vec3 := simd.LoadInt32x16Slice(v3) switch which { - case "MaskedShiftLeftAndFillUpperFrom": - gotv = vec0.MaskedShiftLeftAndFillUpperFrom(vec1, vec2, vec3.AsMask32x16()) - case "MaskedShiftRightAndFillUpperFrom": - gotv = vec0.MaskedShiftRightAndFillUpperFrom(vec1, vec2, vec3.AsMask32x16()) + case "ShiftLeftAndFillUpperFromMasked": + gotv = vec0.ShiftLeftAndFillUpperFromMasked(vec1, vec2, vec3.AsMask32x16()) + case "ShiftRightAndFillUpperFromMasked": + gotv = vec0.ShiftRightAndFillUpperFromMasked(vec1, vec2, vec3.AsMask32x16()) default: t.Errorf("Unknown method: Int32x16.%s", which) @@ -3944,10 +3944,10 @@ func testInt32x16Uint8x64Int8x64Mask32x16Int32x16(t *testing.T, v0 []int32, v1 [ vec2 := simd.LoadInt8x64Slice(v2) vec3 := simd.LoadInt32x16Slice(v3) switch which { - case "MaskedSaturatedUnsignedSignedQuadDotProdAccumulate": - gotv = vec0.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(vec1, vec2, vec3.AsMask32x16()) - case "MaskedUnsignedSignedQuadDotProdAccumulate": - gotv = vec0.MaskedUnsignedSignedQuadDotProdAccumulate(vec1, vec2, vec3.AsMask32x16()) + case "SaturatedUnsignedSignedQuadDotProdAccumulateMasked": + gotv = vec0.SaturatedUnsignedSignedQuadDotProdAccumulateMasked(vec1, vec2, vec3.AsMask32x16()) + case "UnsignedSignedQuadDotProdAccumulateMasked": + gotv = vec0.UnsignedSignedQuadDotProdAccumulateMasked(vec1, vec2, vec3.AsMask32x16()) default: t.Errorf("Unknown method: Int32x16.%s", which) @@ -3989,10 +3989,10 @@ func testInt32x16UnaryMasked(t *testing.T, v0 []int32, v1 []int32, want []int32, vec0 := simd.LoadInt32x16Slice(v0) vec1 := simd.LoadInt32x16Slice(v1) switch which { - case "MaskedAbsolute": - gotv = vec0.MaskedAbsolute(vec1.AsMask32x16()) - case "MaskedPopCount": - gotv = vec0.MaskedPopCount(vec1.AsMask32x16()) + case "AbsoluteMasked": + gotv = vec0.AbsoluteMasked(vec1.AsMask32x16()) + case "PopCountMasked": + gotv = vec0.PopCountMasked(vec1.AsMask32x16()) default: t.Errorf("Unknown method: Int32x16.%s", which) @@ -4062,36 +4062,36 @@ func testInt64x2BinaryMasked(t *testing.T, v0 []int64, v1 []int64, v2 []int64, w vec1 := simd.LoadInt64x2Slice(v1) vec2 := simd.LoadInt64x2Slice(v2) switch which { - case "MaskedAdd": - gotv = vec0.MaskedAdd(vec1, vec2.AsMask64x2()) - case "MaskedAnd": - gotv = vec0.MaskedAnd(vec1, vec2.AsMask64x2()) - case "MaskedAndNot": - gotv = vec0.MaskedAndNot(vec1, vec2.AsMask64x2()) - case "MaskedMax": - gotv = vec0.MaskedMax(vec1, vec2.AsMask64x2()) - case "MaskedMin": - gotv = vec0.MaskedMin(vec1, vec2.AsMask64x2()) - case "MaskedMulEvenWiden": - gotv = vec0.MaskedMulEvenWiden(vec1, vec2.AsMask64x2()) - case "MaskedMulLow": - gotv = vec0.MaskedMulLow(vec1, vec2.AsMask64x2()) - case "MaskedOr": - gotv = vec0.MaskedOr(vec1, vec2.AsMask64x2()) - case "MaskedRotateLeft": - gotv = vec0.MaskedRotateLeft(vec1, vec2.AsMask64x2()) - case "MaskedRotateRight": - gotv = vec0.MaskedRotateRight(vec1, vec2.AsMask64x2()) - case "MaskedShiftLeft": - gotv = vec0.MaskedShiftLeft(vec1, vec2.AsMask64x2()) - case "MaskedShiftRight": - gotv = vec0.MaskedShiftRight(vec1, vec2.AsMask64x2()) - case "MaskedShiftRightSignExtended": - gotv = vec0.MaskedShiftRightSignExtended(vec1, vec2.AsMask64x2()) - case "MaskedSub": - gotv = vec0.MaskedSub(vec1, vec2.AsMask64x2()) - case "MaskedXor": - gotv = vec0.MaskedXor(vec1, vec2.AsMask64x2()) + case "AddMasked": + gotv = vec0.AddMasked(vec1, vec2.AsMask64x2()) + case "AndMasked": + gotv = vec0.AndMasked(vec1, vec2.AsMask64x2()) + case "AndNotMasked": + gotv = vec0.AndNotMasked(vec1, vec2.AsMask64x2()) + case "MaxMasked": + gotv = vec0.MaxMasked(vec1, vec2.AsMask64x2()) + case "MinMasked": + gotv = vec0.MinMasked(vec1, vec2.AsMask64x2()) + case "MulEvenWidenMasked": + gotv = vec0.MulEvenWidenMasked(vec1, vec2.AsMask64x2()) + case "MulLowMasked": + gotv = vec0.MulLowMasked(vec1, vec2.AsMask64x2()) + case "OrMasked": + gotv = vec0.OrMasked(vec1, vec2.AsMask64x2()) + case "RotateLeftMasked": + gotv = vec0.RotateLeftMasked(vec1, vec2.AsMask64x2()) + case "RotateRightMasked": + gotv = vec0.RotateRightMasked(vec1, vec2.AsMask64x2()) + case "ShiftLeftMasked": + gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask64x2()) + case "ShiftRightMasked": + gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask64x2()) + case "ShiftRightSignExtendedMasked": + gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask64x2()) + case "SubMasked": + gotv = vec0.SubMasked(vec1, vec2.AsMask64x2()) + case "XorMasked": + gotv = vec0.XorMasked(vec1, vec2.AsMask64x2()) default: t.Errorf("Unknown method: Int64x2.%s", which) @@ -4143,18 +4143,18 @@ func testInt64x2MaskedCompare(t *testing.T, v0 []int64, v1 []int64, v2 []int64, vec1 := simd.LoadInt64x2Slice(v1) vec2 := simd.LoadInt64x2Slice(v2) switch which { - case "MaskedEqual": - gotv = vec0.MaskedEqual(vec1, vec2.AsMask64x2()).AsInt64x2() - case "MaskedGreater": - gotv = vec0.MaskedGreater(vec1, vec2.AsMask64x2()).AsInt64x2() - case "MaskedGreaterEqual": - gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask64x2()).AsInt64x2() - case "MaskedLess": - gotv = vec0.MaskedLess(vec1, vec2.AsMask64x2()).AsInt64x2() - case "MaskedLessEqual": - gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask64x2()).AsInt64x2() - case "MaskedNotEqual": - gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask64x2()).AsInt64x2() + case "EqualMasked": + gotv = vec0.EqualMasked(vec1, vec2.AsMask64x2()).AsInt64x2() + case "GreaterEqualMasked": + gotv = vec0.GreaterEqualMasked(vec1, vec2.AsMask64x2()).AsInt64x2() + case "GreaterMasked": + gotv = vec0.GreaterMasked(vec1, vec2.AsMask64x2()).AsInt64x2() + case "LessEqualMasked": + gotv = vec0.LessEqualMasked(vec1, vec2.AsMask64x2()).AsInt64x2() + case "LessMasked": + gotv = vec0.LessMasked(vec1, vec2.AsMask64x2()).AsInt64x2() + case "NotEqualMasked": + gotv = vec0.NotEqualMasked(vec1, vec2.AsMask64x2()).AsInt64x2() default: t.Errorf("Unknown method: Int64x2.%s", which) @@ -4200,10 +4200,10 @@ func testInt64x2TernaryMasked(t *testing.T, v0 []int64, v1 []int64, v2 []int64, vec2 := simd.LoadInt64x2Slice(v2) vec3 := simd.LoadInt64x2Slice(v3) switch which { - case "MaskedShiftLeftAndFillUpperFrom": - gotv = vec0.MaskedShiftLeftAndFillUpperFrom(vec1, vec2, vec3.AsMask64x2()) - case "MaskedShiftRightAndFillUpperFrom": - gotv = vec0.MaskedShiftRightAndFillUpperFrom(vec1, vec2, vec3.AsMask64x2()) + case "ShiftLeftAndFillUpperFromMasked": + gotv = vec0.ShiftLeftAndFillUpperFromMasked(vec1, vec2, vec3.AsMask64x2()) + case "ShiftRightAndFillUpperFromMasked": + gotv = vec0.ShiftRightAndFillUpperFromMasked(vec1, vec2, vec3.AsMask64x2()) default: t.Errorf("Unknown method: Int64x2.%s", which) @@ -4245,10 +4245,10 @@ func testInt64x2UnaryMasked(t *testing.T, v0 []int64, v1 []int64, want []int64, vec0 := simd.LoadInt64x2Slice(v0) vec1 := simd.LoadInt64x2Slice(v1) switch which { - case "MaskedAbsolute": - gotv = vec0.MaskedAbsolute(vec1.AsMask64x2()) - case "MaskedPopCount": - gotv = vec0.MaskedPopCount(vec1.AsMask64x2()) + case "AbsoluteMasked": + gotv = vec0.AbsoluteMasked(vec1.AsMask64x2()) + case "PopCountMasked": + gotv = vec0.PopCountMasked(vec1.AsMask64x2()) default: t.Errorf("Unknown method: Int64x2.%s", which) @@ -4318,36 +4318,36 @@ func testInt64x4BinaryMasked(t *testing.T, v0 []int64, v1 []int64, v2 []int64, w vec1 := simd.LoadInt64x4Slice(v1) vec2 := simd.LoadInt64x4Slice(v2) switch which { - case "MaskedAdd": - gotv = vec0.MaskedAdd(vec1, vec2.AsMask64x4()) - case "MaskedAnd": - gotv = vec0.MaskedAnd(vec1, vec2.AsMask64x4()) - case "MaskedAndNot": - gotv = vec0.MaskedAndNot(vec1, vec2.AsMask64x4()) - case "MaskedMax": - gotv = vec0.MaskedMax(vec1, vec2.AsMask64x4()) - case "MaskedMin": - gotv = vec0.MaskedMin(vec1, vec2.AsMask64x4()) - case "MaskedMulEvenWiden": - gotv = vec0.MaskedMulEvenWiden(vec1, vec2.AsMask64x4()) - case "MaskedMulLow": - gotv = vec0.MaskedMulLow(vec1, vec2.AsMask64x4()) - case "MaskedOr": - gotv = vec0.MaskedOr(vec1, vec2.AsMask64x4()) - case "MaskedRotateLeft": - gotv = vec0.MaskedRotateLeft(vec1, vec2.AsMask64x4()) - case "MaskedRotateRight": - gotv = vec0.MaskedRotateRight(vec1, vec2.AsMask64x4()) - case "MaskedShiftLeft": - gotv = vec0.MaskedShiftLeft(vec1, vec2.AsMask64x4()) - case "MaskedShiftRight": - gotv = vec0.MaskedShiftRight(vec1, vec2.AsMask64x4()) - case "MaskedShiftRightSignExtended": - gotv = vec0.MaskedShiftRightSignExtended(vec1, vec2.AsMask64x4()) - case "MaskedSub": - gotv = vec0.MaskedSub(vec1, vec2.AsMask64x4()) - case "MaskedXor": - gotv = vec0.MaskedXor(vec1, vec2.AsMask64x4()) + case "AddMasked": + gotv = vec0.AddMasked(vec1, vec2.AsMask64x4()) + case "AndMasked": + gotv = vec0.AndMasked(vec1, vec2.AsMask64x4()) + case "AndNotMasked": + gotv = vec0.AndNotMasked(vec1, vec2.AsMask64x4()) + case "MaxMasked": + gotv = vec0.MaxMasked(vec1, vec2.AsMask64x4()) + case "MinMasked": + gotv = vec0.MinMasked(vec1, vec2.AsMask64x4()) + case "MulEvenWidenMasked": + gotv = vec0.MulEvenWidenMasked(vec1, vec2.AsMask64x4()) + case "MulLowMasked": + gotv = vec0.MulLowMasked(vec1, vec2.AsMask64x4()) + case "OrMasked": + gotv = vec0.OrMasked(vec1, vec2.AsMask64x4()) + case "RotateLeftMasked": + gotv = vec0.RotateLeftMasked(vec1, vec2.AsMask64x4()) + case "RotateRightMasked": + gotv = vec0.RotateRightMasked(vec1, vec2.AsMask64x4()) + case "ShiftLeftMasked": + gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask64x4()) + case "ShiftRightMasked": + gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask64x4()) + case "ShiftRightSignExtendedMasked": + gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask64x4()) + case "SubMasked": + gotv = vec0.SubMasked(vec1, vec2.AsMask64x4()) + case "XorMasked": + gotv = vec0.XorMasked(vec1, vec2.AsMask64x4()) default: t.Errorf("Unknown method: Int64x4.%s", which) @@ -4399,18 +4399,18 @@ func testInt64x4MaskedCompare(t *testing.T, v0 []int64, v1 []int64, v2 []int64, vec1 := simd.LoadInt64x4Slice(v1) vec2 := simd.LoadInt64x4Slice(v2) switch which { - case "MaskedEqual": - gotv = vec0.MaskedEqual(vec1, vec2.AsMask64x4()).AsInt64x4() - case "MaskedGreater": - gotv = vec0.MaskedGreater(vec1, vec2.AsMask64x4()).AsInt64x4() - case "MaskedGreaterEqual": - gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask64x4()).AsInt64x4() - case "MaskedLess": - gotv = vec0.MaskedLess(vec1, vec2.AsMask64x4()).AsInt64x4() - case "MaskedLessEqual": - gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask64x4()).AsInt64x4() - case "MaskedNotEqual": - gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask64x4()).AsInt64x4() + case "EqualMasked": + gotv = vec0.EqualMasked(vec1, vec2.AsMask64x4()).AsInt64x4() + case "GreaterEqualMasked": + gotv = vec0.GreaterEqualMasked(vec1, vec2.AsMask64x4()).AsInt64x4() + case "GreaterMasked": + gotv = vec0.GreaterMasked(vec1, vec2.AsMask64x4()).AsInt64x4() + case "LessEqualMasked": + gotv = vec0.LessEqualMasked(vec1, vec2.AsMask64x4()).AsInt64x4() + case "LessMasked": + gotv = vec0.LessMasked(vec1, vec2.AsMask64x4()).AsInt64x4() + case "NotEqualMasked": + gotv = vec0.NotEqualMasked(vec1, vec2.AsMask64x4()).AsInt64x4() default: t.Errorf("Unknown method: Int64x4.%s", which) @@ -4456,10 +4456,10 @@ func testInt64x4TernaryMasked(t *testing.T, v0 []int64, v1 []int64, v2 []int64, vec2 := simd.LoadInt64x4Slice(v2) vec3 := simd.LoadInt64x4Slice(v3) switch which { - case "MaskedShiftLeftAndFillUpperFrom": - gotv = vec0.MaskedShiftLeftAndFillUpperFrom(vec1, vec2, vec3.AsMask64x4()) - case "MaskedShiftRightAndFillUpperFrom": - gotv = vec0.MaskedShiftRightAndFillUpperFrom(vec1, vec2, vec3.AsMask64x4()) + case "ShiftLeftAndFillUpperFromMasked": + gotv = vec0.ShiftLeftAndFillUpperFromMasked(vec1, vec2, vec3.AsMask64x4()) + case "ShiftRightAndFillUpperFromMasked": + gotv = vec0.ShiftRightAndFillUpperFromMasked(vec1, vec2, vec3.AsMask64x4()) default: t.Errorf("Unknown method: Int64x4.%s", which) @@ -4501,10 +4501,10 @@ func testInt64x4UnaryMasked(t *testing.T, v0 []int64, v1 []int64, want []int64, vec0 := simd.LoadInt64x4Slice(v0) vec1 := simd.LoadInt64x4Slice(v1) switch which { - case "MaskedAbsolute": - gotv = vec0.MaskedAbsolute(vec1.AsMask64x4()) - case "MaskedPopCount": - gotv = vec0.MaskedPopCount(vec1.AsMask64x4()) + case "AbsoluteMasked": + gotv = vec0.AbsoluteMasked(vec1.AsMask64x4()) + case "PopCountMasked": + gotv = vec0.PopCountMasked(vec1.AsMask64x4()) default: t.Errorf("Unknown method: Int64x4.%s", which) @@ -4574,36 +4574,36 @@ func testInt64x8BinaryMasked(t *testing.T, v0 []int64, v1 []int64, v2 []int64, w vec1 := simd.LoadInt64x8Slice(v1) vec2 := simd.LoadInt64x8Slice(v2) switch which { - case "MaskedAdd": - gotv = vec0.MaskedAdd(vec1, vec2.AsMask64x8()) - case "MaskedAnd": - gotv = vec0.MaskedAnd(vec1, vec2.AsMask64x8()) - case "MaskedAndNot": - gotv = vec0.MaskedAndNot(vec1, vec2.AsMask64x8()) - case "MaskedMax": - gotv = vec0.MaskedMax(vec1, vec2.AsMask64x8()) - case "MaskedMin": - gotv = vec0.MaskedMin(vec1, vec2.AsMask64x8()) - case "MaskedMulEvenWiden": - gotv = vec0.MaskedMulEvenWiden(vec1, vec2.AsMask64x8()) - case "MaskedMulLow": - gotv = vec0.MaskedMulLow(vec1, vec2.AsMask64x8()) - case "MaskedOr": - gotv = vec0.MaskedOr(vec1, vec2.AsMask64x8()) - case "MaskedRotateLeft": - gotv = vec0.MaskedRotateLeft(vec1, vec2.AsMask64x8()) - case "MaskedRotateRight": - gotv = vec0.MaskedRotateRight(vec1, vec2.AsMask64x8()) - case "MaskedShiftLeft": - gotv = vec0.MaskedShiftLeft(vec1, vec2.AsMask64x8()) - case "MaskedShiftRight": - gotv = vec0.MaskedShiftRight(vec1, vec2.AsMask64x8()) - case "MaskedShiftRightSignExtended": - gotv = vec0.MaskedShiftRightSignExtended(vec1, vec2.AsMask64x8()) - case "MaskedSub": - gotv = vec0.MaskedSub(vec1, vec2.AsMask64x8()) - case "MaskedXor": - gotv = vec0.MaskedXor(vec1, vec2.AsMask64x8()) + case "AddMasked": + gotv = vec0.AddMasked(vec1, vec2.AsMask64x8()) + case "AndMasked": + gotv = vec0.AndMasked(vec1, vec2.AsMask64x8()) + case "AndNotMasked": + gotv = vec0.AndNotMasked(vec1, vec2.AsMask64x8()) + case "MaxMasked": + gotv = vec0.MaxMasked(vec1, vec2.AsMask64x8()) + case "MinMasked": + gotv = vec0.MinMasked(vec1, vec2.AsMask64x8()) + case "MulEvenWidenMasked": + gotv = vec0.MulEvenWidenMasked(vec1, vec2.AsMask64x8()) + case "MulLowMasked": + gotv = vec0.MulLowMasked(vec1, vec2.AsMask64x8()) + case "OrMasked": + gotv = vec0.OrMasked(vec1, vec2.AsMask64x8()) + case "RotateLeftMasked": + gotv = vec0.RotateLeftMasked(vec1, vec2.AsMask64x8()) + case "RotateRightMasked": + gotv = vec0.RotateRightMasked(vec1, vec2.AsMask64x8()) + case "ShiftLeftMasked": + gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask64x8()) + case "ShiftRightMasked": + gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask64x8()) + case "ShiftRightSignExtendedMasked": + gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask64x8()) + case "SubMasked": + gotv = vec0.SubMasked(vec1, vec2.AsMask64x8()) + case "XorMasked": + gotv = vec0.XorMasked(vec1, vec2.AsMask64x8()) default: t.Errorf("Unknown method: Int64x8.%s", which) @@ -4655,18 +4655,18 @@ func testInt64x8MaskedCompare(t *testing.T, v0 []int64, v1 []int64, v2 []int64, vec1 := simd.LoadInt64x8Slice(v1) vec2 := simd.LoadInt64x8Slice(v2) switch which { - case "MaskedEqual": - gotv = vec0.MaskedEqual(vec1, vec2.AsMask64x8()).AsInt64x8() - case "MaskedGreater": - gotv = vec0.MaskedGreater(vec1, vec2.AsMask64x8()).AsInt64x8() - case "MaskedGreaterEqual": - gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask64x8()).AsInt64x8() - case "MaskedLess": - gotv = vec0.MaskedLess(vec1, vec2.AsMask64x8()).AsInt64x8() - case "MaskedLessEqual": - gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask64x8()).AsInt64x8() - case "MaskedNotEqual": - gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask64x8()).AsInt64x8() + case "EqualMasked": + gotv = vec0.EqualMasked(vec1, vec2.AsMask64x8()).AsInt64x8() + case "GreaterEqualMasked": + gotv = vec0.GreaterEqualMasked(vec1, vec2.AsMask64x8()).AsInt64x8() + case "GreaterMasked": + gotv = vec0.GreaterMasked(vec1, vec2.AsMask64x8()).AsInt64x8() + case "LessEqualMasked": + gotv = vec0.LessEqualMasked(vec1, vec2.AsMask64x8()).AsInt64x8() + case "LessMasked": + gotv = vec0.LessMasked(vec1, vec2.AsMask64x8()).AsInt64x8() + case "NotEqualMasked": + gotv = vec0.NotEqualMasked(vec1, vec2.AsMask64x8()).AsInt64x8() default: t.Errorf("Unknown method: Int64x8.%s", which) @@ -4712,10 +4712,10 @@ func testInt64x8TernaryMasked(t *testing.T, v0 []int64, v1 []int64, v2 []int64, vec2 := simd.LoadInt64x8Slice(v2) vec3 := simd.LoadInt64x8Slice(v3) switch which { - case "MaskedShiftLeftAndFillUpperFrom": - gotv = vec0.MaskedShiftLeftAndFillUpperFrom(vec1, vec2, vec3.AsMask64x8()) - case "MaskedShiftRightAndFillUpperFrom": - gotv = vec0.MaskedShiftRightAndFillUpperFrom(vec1, vec2, vec3.AsMask64x8()) + case "ShiftLeftAndFillUpperFromMasked": + gotv = vec0.ShiftLeftAndFillUpperFromMasked(vec1, vec2, vec3.AsMask64x8()) + case "ShiftRightAndFillUpperFromMasked": + gotv = vec0.ShiftRightAndFillUpperFromMasked(vec1, vec2, vec3.AsMask64x8()) default: t.Errorf("Unknown method: Int64x8.%s", which) @@ -4757,10 +4757,10 @@ func testInt64x8UnaryMasked(t *testing.T, v0 []int64, v1 []int64, want []int64, vec0 := simd.LoadInt64x8Slice(v0) vec1 := simd.LoadInt64x8Slice(v1) switch which { - case "MaskedAbsolute": - gotv = vec0.MaskedAbsolute(vec1.AsMask64x8()) - case "MaskedPopCount": - gotv = vec0.MaskedPopCount(vec1.AsMask64x8()) + case "AbsoluteMasked": + gotv = vec0.AbsoluteMasked(vec1.AsMask64x8()) + case "PopCountMasked": + gotv = vec0.PopCountMasked(vec1.AsMask64x8()) default: t.Errorf("Unknown method: Int64x8.%s", which) @@ -4824,22 +4824,22 @@ func testUint8x16BinaryMasked(t *testing.T, v0 []uint8, v1 []uint8, v2 []int8, w vec1 := simd.LoadUint8x16Slice(v1) vec2 := simd.LoadInt8x16Slice(v2) switch which { - case "MaskedAdd": - gotv = vec0.MaskedAdd(vec1, vec2.AsMask8x16()) - case "MaskedAverage": - gotv = vec0.MaskedAverage(vec1, vec2.AsMask8x16()) - case "MaskedGaloisFieldMul": - gotv = vec0.MaskedGaloisFieldMul(vec1, vec2.AsMask8x16()) - case "MaskedMax": - gotv = vec0.MaskedMax(vec1, vec2.AsMask8x16()) - case "MaskedMin": - gotv = vec0.MaskedMin(vec1, vec2.AsMask8x16()) - case "MaskedSaturatedAdd": - gotv = vec0.MaskedSaturatedAdd(vec1, vec2.AsMask8x16()) - case "MaskedSaturatedSub": - gotv = vec0.MaskedSaturatedSub(vec1, vec2.AsMask8x16()) - case "MaskedSub": - gotv = vec0.MaskedSub(vec1, vec2.AsMask8x16()) + case "AddMasked": + gotv = vec0.AddMasked(vec1, vec2.AsMask8x16()) + case "AverageMasked": + gotv = vec0.AverageMasked(vec1, vec2.AsMask8x16()) + case "GaloisFieldMulMasked": + gotv = vec0.GaloisFieldMulMasked(vec1, vec2.AsMask8x16()) + case "MaxMasked": + gotv = vec0.MaxMasked(vec1, vec2.AsMask8x16()) + case "MinMasked": + gotv = vec0.MinMasked(vec1, vec2.AsMask8x16()) + case "SaturatedAddMasked": + gotv = vec0.SaturatedAddMasked(vec1, vec2.AsMask8x16()) + case "SaturatedSubMasked": + gotv = vec0.SaturatedSubMasked(vec1, vec2.AsMask8x16()) + case "SubMasked": + gotv = vec0.SubMasked(vec1, vec2.AsMask8x16()) default: t.Errorf("Unknown method: Uint8x16.%s", which) @@ -4912,8 +4912,8 @@ func testUint8x16Int8x16Mask16x8Int16x8(t *testing.T, v0 []uint8, v1 []int8, v2 vec1 := simd.LoadInt8x16Slice(v1) vec2 := simd.LoadInt16x8Slice(v2) switch which { - case "MaskedSaturatedUnsignedSignedPairDotProd": - gotv = vec0.MaskedSaturatedUnsignedSignedPairDotProd(vec1, vec2.AsMask16x8()) + case "SaturatedUnsignedSignedPairDotProdMasked": + gotv = vec0.SaturatedUnsignedSignedPairDotProdMasked(vec1, vec2.AsMask16x8()) default: t.Errorf("Unknown method: Uint8x16.%s", which) @@ -4934,18 +4934,18 @@ func testUint8x16MaskedCompare(t *testing.T, v0 []uint8, v1 []uint8, v2 []int8, vec1 := simd.LoadUint8x16Slice(v1) vec2 := simd.LoadInt8x16Slice(v2) switch which { - case "MaskedEqual": - gotv = vec0.MaskedEqual(vec1, vec2.AsMask8x16()).AsInt8x16() - case "MaskedGreater": - gotv = vec0.MaskedGreater(vec1, vec2.AsMask8x16()).AsInt8x16() - case "MaskedGreaterEqual": - gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask8x16()).AsInt8x16() - case "MaskedLess": - gotv = vec0.MaskedLess(vec1, vec2.AsMask8x16()).AsInt8x16() - case "MaskedLessEqual": - gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask8x16()).AsInt8x16() - case "MaskedNotEqual": - gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask8x16()).AsInt8x16() + case "EqualMasked": + gotv = vec0.EqualMasked(vec1, vec2.AsMask8x16()).AsInt8x16() + case "GreaterEqualMasked": + gotv = vec0.GreaterEqualMasked(vec1, vec2.AsMask8x16()).AsInt8x16() + case "GreaterMasked": + gotv = vec0.GreaterMasked(vec1, vec2.AsMask8x16()).AsInt8x16() + case "LessEqualMasked": + gotv = vec0.LessEqualMasked(vec1, vec2.AsMask8x16()).AsInt8x16() + case "LessMasked": + gotv = vec0.LessMasked(vec1, vec2.AsMask8x16()).AsInt8x16() + case "NotEqualMasked": + gotv = vec0.NotEqualMasked(vec1, vec2.AsMask8x16()).AsInt8x16() default: t.Errorf("Unknown method: Uint8x16.%s", which) @@ -4985,8 +4985,8 @@ func testUint8x16UnaryMasked(t *testing.T, v0 []uint8, v1 []int8, want []uint8, vec0 := simd.LoadUint8x16Slice(v0) vec1 := simd.LoadInt8x16Slice(v1) switch which { - case "MaskedPopCount": - gotv = vec0.MaskedPopCount(vec1.AsMask8x16()) + case "PopCountMasked": + gotv = vec0.PopCountMasked(vec1.AsMask8x16()) default: t.Errorf("Unknown method: Uint8x16.%s", which) @@ -5050,22 +5050,22 @@ func testUint8x32BinaryMasked(t *testing.T, v0 []uint8, v1 []uint8, v2 []int8, w vec1 := simd.LoadUint8x32Slice(v1) vec2 := simd.LoadInt8x32Slice(v2) switch which { - case "MaskedAdd": - gotv = vec0.MaskedAdd(vec1, vec2.AsMask8x32()) - case "MaskedAverage": - gotv = vec0.MaskedAverage(vec1, vec2.AsMask8x32()) - case "MaskedGaloisFieldMul": - gotv = vec0.MaskedGaloisFieldMul(vec1, vec2.AsMask8x32()) - case "MaskedMax": - gotv = vec0.MaskedMax(vec1, vec2.AsMask8x32()) - case "MaskedMin": - gotv = vec0.MaskedMin(vec1, vec2.AsMask8x32()) - case "MaskedSaturatedAdd": - gotv = vec0.MaskedSaturatedAdd(vec1, vec2.AsMask8x32()) - case "MaskedSaturatedSub": - gotv = vec0.MaskedSaturatedSub(vec1, vec2.AsMask8x32()) - case "MaskedSub": - gotv = vec0.MaskedSub(vec1, vec2.AsMask8x32()) + case "AddMasked": + gotv = vec0.AddMasked(vec1, vec2.AsMask8x32()) + case "AverageMasked": + gotv = vec0.AverageMasked(vec1, vec2.AsMask8x32()) + case "GaloisFieldMulMasked": + gotv = vec0.GaloisFieldMulMasked(vec1, vec2.AsMask8x32()) + case "MaxMasked": + gotv = vec0.MaxMasked(vec1, vec2.AsMask8x32()) + case "MinMasked": + gotv = vec0.MinMasked(vec1, vec2.AsMask8x32()) + case "SaturatedAddMasked": + gotv = vec0.SaturatedAddMasked(vec1, vec2.AsMask8x32()) + case "SaturatedSubMasked": + gotv = vec0.SaturatedSubMasked(vec1, vec2.AsMask8x32()) + case "SubMasked": + gotv = vec0.SubMasked(vec1, vec2.AsMask8x32()) default: t.Errorf("Unknown method: Uint8x32.%s", which) @@ -5138,8 +5138,8 @@ func testUint8x32Int8x32Mask16x16Int16x16(t *testing.T, v0 []uint8, v1 []int8, v vec1 := simd.LoadInt8x32Slice(v1) vec2 := simd.LoadInt16x16Slice(v2) switch which { - case "MaskedSaturatedUnsignedSignedPairDotProd": - gotv = vec0.MaskedSaturatedUnsignedSignedPairDotProd(vec1, vec2.AsMask16x16()) + case "SaturatedUnsignedSignedPairDotProdMasked": + gotv = vec0.SaturatedUnsignedSignedPairDotProdMasked(vec1, vec2.AsMask16x16()) default: t.Errorf("Unknown method: Uint8x32.%s", which) @@ -5160,18 +5160,18 @@ func testUint8x32MaskedCompare(t *testing.T, v0 []uint8, v1 []uint8, v2 []int8, vec1 := simd.LoadUint8x32Slice(v1) vec2 := simd.LoadInt8x32Slice(v2) switch which { - case "MaskedEqual": - gotv = vec0.MaskedEqual(vec1, vec2.AsMask8x32()).AsInt8x32() - case "MaskedGreater": - gotv = vec0.MaskedGreater(vec1, vec2.AsMask8x32()).AsInt8x32() - case "MaskedGreaterEqual": - gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask8x32()).AsInt8x32() - case "MaskedLess": - gotv = vec0.MaskedLess(vec1, vec2.AsMask8x32()).AsInt8x32() - case "MaskedLessEqual": - gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask8x32()).AsInt8x32() - case "MaskedNotEqual": - gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask8x32()).AsInt8x32() + case "EqualMasked": + gotv = vec0.EqualMasked(vec1, vec2.AsMask8x32()).AsInt8x32() + case "GreaterEqualMasked": + gotv = vec0.GreaterEqualMasked(vec1, vec2.AsMask8x32()).AsInt8x32() + case "GreaterMasked": + gotv = vec0.GreaterMasked(vec1, vec2.AsMask8x32()).AsInt8x32() + case "LessEqualMasked": + gotv = vec0.LessEqualMasked(vec1, vec2.AsMask8x32()).AsInt8x32() + case "LessMasked": + gotv = vec0.LessMasked(vec1, vec2.AsMask8x32()).AsInt8x32() + case "NotEqualMasked": + gotv = vec0.NotEqualMasked(vec1, vec2.AsMask8x32()).AsInt8x32() default: t.Errorf("Unknown method: Uint8x32.%s", which) @@ -5211,8 +5211,8 @@ func testUint8x32UnaryMasked(t *testing.T, v0 []uint8, v1 []int8, want []uint8, vec0 := simd.LoadUint8x32Slice(v0) vec1 := simd.LoadInt8x32Slice(v1) switch which { - case "MaskedPopCount": - gotv = vec0.MaskedPopCount(vec1.AsMask8x32()) + case "PopCountMasked": + gotv = vec0.PopCountMasked(vec1.AsMask8x32()) default: t.Errorf("Unknown method: Uint8x32.%s", which) @@ -5268,22 +5268,22 @@ func testUint8x64BinaryMasked(t *testing.T, v0 []uint8, v1 []uint8, v2 []int8, w vec1 := simd.LoadUint8x64Slice(v1) vec2 := simd.LoadInt8x64Slice(v2) switch which { - case "MaskedAdd": - gotv = vec0.MaskedAdd(vec1, vec2.AsMask8x64()) - case "MaskedAverage": - gotv = vec0.MaskedAverage(vec1, vec2.AsMask8x64()) - case "MaskedGaloisFieldMul": - gotv = vec0.MaskedGaloisFieldMul(vec1, vec2.AsMask8x64()) - case "MaskedMax": - gotv = vec0.MaskedMax(vec1, vec2.AsMask8x64()) - case "MaskedMin": - gotv = vec0.MaskedMin(vec1, vec2.AsMask8x64()) - case "MaskedSaturatedAdd": - gotv = vec0.MaskedSaturatedAdd(vec1, vec2.AsMask8x64()) - case "MaskedSaturatedSub": - gotv = vec0.MaskedSaturatedSub(vec1, vec2.AsMask8x64()) - case "MaskedSub": - gotv = vec0.MaskedSub(vec1, vec2.AsMask8x64()) + case "AddMasked": + gotv = vec0.AddMasked(vec1, vec2.AsMask8x64()) + case "AverageMasked": + gotv = vec0.AverageMasked(vec1, vec2.AsMask8x64()) + case "GaloisFieldMulMasked": + gotv = vec0.GaloisFieldMulMasked(vec1, vec2.AsMask8x64()) + case "MaxMasked": + gotv = vec0.MaxMasked(vec1, vec2.AsMask8x64()) + case "MinMasked": + gotv = vec0.MinMasked(vec1, vec2.AsMask8x64()) + case "SaturatedAddMasked": + gotv = vec0.SaturatedAddMasked(vec1, vec2.AsMask8x64()) + case "SaturatedSubMasked": + gotv = vec0.SaturatedSubMasked(vec1, vec2.AsMask8x64()) + case "SubMasked": + gotv = vec0.SubMasked(vec1, vec2.AsMask8x64()) default: t.Errorf("Unknown method: Uint8x64.%s", which) @@ -5356,8 +5356,8 @@ func testUint8x64Int8x64Mask16x32Int16x32(t *testing.T, v0 []uint8, v1 []int8, v vec1 := simd.LoadInt8x64Slice(v1) vec2 := simd.LoadInt16x32Slice(v2) switch which { - case "MaskedSaturatedUnsignedSignedPairDotProd": - gotv = vec0.MaskedSaturatedUnsignedSignedPairDotProd(vec1, vec2.AsMask16x32()) + case "SaturatedUnsignedSignedPairDotProdMasked": + gotv = vec0.SaturatedUnsignedSignedPairDotProdMasked(vec1, vec2.AsMask16x32()) default: t.Errorf("Unknown method: Uint8x64.%s", which) @@ -5378,18 +5378,18 @@ func testUint8x64MaskedCompare(t *testing.T, v0 []uint8, v1 []uint8, v2 []int8, vec1 := simd.LoadUint8x64Slice(v1) vec2 := simd.LoadInt8x64Slice(v2) switch which { - case "MaskedEqual": - gotv = vec0.MaskedEqual(vec1, vec2.AsMask8x64()).AsInt8x64() - case "MaskedGreater": - gotv = vec0.MaskedGreater(vec1, vec2.AsMask8x64()).AsInt8x64() - case "MaskedGreaterEqual": - gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask8x64()).AsInt8x64() - case "MaskedLess": - gotv = vec0.MaskedLess(vec1, vec2.AsMask8x64()).AsInt8x64() - case "MaskedLessEqual": - gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask8x64()).AsInt8x64() - case "MaskedNotEqual": - gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask8x64()).AsInt8x64() + case "EqualMasked": + gotv = vec0.EqualMasked(vec1, vec2.AsMask8x64()).AsInt8x64() + case "GreaterEqualMasked": + gotv = vec0.GreaterEqualMasked(vec1, vec2.AsMask8x64()).AsInt8x64() + case "GreaterMasked": + gotv = vec0.GreaterMasked(vec1, vec2.AsMask8x64()).AsInt8x64() + case "LessEqualMasked": + gotv = vec0.LessEqualMasked(vec1, vec2.AsMask8x64()).AsInt8x64() + case "LessMasked": + gotv = vec0.LessMasked(vec1, vec2.AsMask8x64()).AsInt8x64() + case "NotEqualMasked": + gotv = vec0.NotEqualMasked(vec1, vec2.AsMask8x64()).AsInt8x64() default: t.Errorf("Unknown method: Uint8x64.%s", which) @@ -5429,8 +5429,8 @@ func testUint8x64UnaryMasked(t *testing.T, v0 []uint8, v1 []int8, want []uint8, vec0 := simd.LoadUint8x64Slice(v0) vec1 := simd.LoadInt8x64Slice(v1) switch which { - case "MaskedPopCount": - gotv = vec0.MaskedPopCount(vec1.AsMask8x64()) + case "PopCountMasked": + gotv = vec0.PopCountMasked(vec1.AsMask8x64()) default: t.Errorf("Unknown method: Uint8x64.%s", which) @@ -5504,28 +5504,28 @@ func testUint16x8BinaryMasked(t *testing.T, v0 []uint16, v1 []uint16, v2 []int16 vec1 := simd.LoadUint16x8Slice(v1) vec2 := simd.LoadInt16x8Slice(v2) switch which { - case "MaskedAdd": - gotv = vec0.MaskedAdd(vec1, vec2.AsMask16x8()) - case "MaskedAverage": - gotv = vec0.MaskedAverage(vec1, vec2.AsMask16x8()) - case "MaskedMax": - gotv = vec0.MaskedMax(vec1, vec2.AsMask16x8()) - case "MaskedMin": - gotv = vec0.MaskedMin(vec1, vec2.AsMask16x8()) - case "MaskedMulHigh": - gotv = vec0.MaskedMulHigh(vec1, vec2.AsMask16x8()) - case "MaskedSaturatedAdd": - gotv = vec0.MaskedSaturatedAdd(vec1, vec2.AsMask16x8()) - case "MaskedSaturatedSub": - gotv = vec0.MaskedSaturatedSub(vec1, vec2.AsMask16x8()) - case "MaskedShiftLeft": - gotv = vec0.MaskedShiftLeft(vec1, vec2.AsMask16x8()) - case "MaskedShiftRight": - gotv = vec0.MaskedShiftRight(vec1, vec2.AsMask16x8()) - case "MaskedShiftRightSignExtended": - gotv = vec0.MaskedShiftRightSignExtended(vec1, vec2.AsMask16x8()) - case "MaskedSub": - gotv = vec0.MaskedSub(vec1, vec2.AsMask16x8()) + case "AddMasked": + gotv = vec0.AddMasked(vec1, vec2.AsMask16x8()) + case "AverageMasked": + gotv = vec0.AverageMasked(vec1, vec2.AsMask16x8()) + case "MaxMasked": + gotv = vec0.MaxMasked(vec1, vec2.AsMask16x8()) + case "MinMasked": + gotv = vec0.MinMasked(vec1, vec2.AsMask16x8()) + case "MulHighMasked": + gotv = vec0.MulHighMasked(vec1, vec2.AsMask16x8()) + case "SaturatedAddMasked": + gotv = vec0.SaturatedAddMasked(vec1, vec2.AsMask16x8()) + case "SaturatedSubMasked": + gotv = vec0.SaturatedSubMasked(vec1, vec2.AsMask16x8()) + case "ShiftLeftMasked": + gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask16x8()) + case "ShiftRightMasked": + gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask16x8()) + case "ShiftRightSignExtendedMasked": + gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask16x8()) + case "SubMasked": + gotv = vec0.SubMasked(vec1, vec2.AsMask16x8()) default: t.Errorf("Unknown method: Uint16x8.%s", which) @@ -5577,18 +5577,18 @@ func testUint16x8MaskedCompare(t *testing.T, v0 []uint16, v1 []uint16, v2 []int1 vec1 := simd.LoadUint16x8Slice(v1) vec2 := simd.LoadInt16x8Slice(v2) switch which { - case "MaskedEqual": - gotv = vec0.MaskedEqual(vec1, vec2.AsMask16x8()).AsInt16x8() - case "MaskedGreater": - gotv = vec0.MaskedGreater(vec1, vec2.AsMask16x8()).AsInt16x8() - case "MaskedGreaterEqual": - gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask16x8()).AsInt16x8() - case "MaskedLess": - gotv = vec0.MaskedLess(vec1, vec2.AsMask16x8()).AsInt16x8() - case "MaskedLessEqual": - gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask16x8()).AsInt16x8() - case "MaskedNotEqual": - gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask16x8()).AsInt16x8() + case "EqualMasked": + gotv = vec0.EqualMasked(vec1, vec2.AsMask16x8()).AsInt16x8() + case "GreaterEqualMasked": + gotv = vec0.GreaterEqualMasked(vec1, vec2.AsMask16x8()).AsInt16x8() + case "GreaterMasked": + gotv = vec0.GreaterMasked(vec1, vec2.AsMask16x8()).AsInt16x8() + case "LessEqualMasked": + gotv = vec0.LessEqualMasked(vec1, vec2.AsMask16x8()).AsInt16x8() + case "LessMasked": + gotv = vec0.LessMasked(vec1, vec2.AsMask16x8()).AsInt16x8() + case "NotEqualMasked": + gotv = vec0.NotEqualMasked(vec1, vec2.AsMask16x8()).AsInt16x8() default: t.Errorf("Unknown method: Uint16x8.%s", which) @@ -5634,10 +5634,10 @@ func testUint16x8TernaryMasked(t *testing.T, v0 []uint16, v1 []uint16, v2 []uint vec2 := simd.LoadUint16x8Slice(v2) vec3 := simd.LoadInt16x8Slice(v3) switch which { - case "MaskedShiftLeftAndFillUpperFrom": - gotv = vec0.MaskedShiftLeftAndFillUpperFrom(vec1, vec2, vec3.AsMask16x8()) - case "MaskedShiftRightAndFillUpperFrom": - gotv = vec0.MaskedShiftRightAndFillUpperFrom(vec1, vec2, vec3.AsMask16x8()) + case "ShiftLeftAndFillUpperFromMasked": + gotv = vec0.ShiftLeftAndFillUpperFromMasked(vec1, vec2, vec3.AsMask16x8()) + case "ShiftRightAndFillUpperFromMasked": + gotv = vec0.ShiftRightAndFillUpperFromMasked(vec1, vec2, vec3.AsMask16x8()) default: t.Errorf("Unknown method: Uint16x8.%s", which) @@ -5677,8 +5677,8 @@ func testUint16x8UnaryMasked(t *testing.T, v0 []uint16, v1 []int16, want []uint1 vec0 := simd.LoadUint16x8Slice(v0) vec1 := simd.LoadInt16x8Slice(v1) switch which { - case "MaskedPopCount": - gotv = vec0.MaskedPopCount(vec1.AsMask16x8()) + case "PopCountMasked": + gotv = vec0.PopCountMasked(vec1.AsMask16x8()) default: t.Errorf("Unknown method: Uint16x8.%s", which) @@ -5752,28 +5752,28 @@ func testUint16x16BinaryMasked(t *testing.T, v0 []uint16, v1 []uint16, v2 []int1 vec1 := simd.LoadUint16x16Slice(v1) vec2 := simd.LoadInt16x16Slice(v2) switch which { - case "MaskedAdd": - gotv = vec0.MaskedAdd(vec1, vec2.AsMask16x16()) - case "MaskedAverage": - gotv = vec0.MaskedAverage(vec1, vec2.AsMask16x16()) - case "MaskedMax": - gotv = vec0.MaskedMax(vec1, vec2.AsMask16x16()) - case "MaskedMin": - gotv = vec0.MaskedMin(vec1, vec2.AsMask16x16()) - case "MaskedMulHigh": - gotv = vec0.MaskedMulHigh(vec1, vec2.AsMask16x16()) - case "MaskedSaturatedAdd": - gotv = vec0.MaskedSaturatedAdd(vec1, vec2.AsMask16x16()) - case "MaskedSaturatedSub": - gotv = vec0.MaskedSaturatedSub(vec1, vec2.AsMask16x16()) - case "MaskedShiftLeft": - gotv = vec0.MaskedShiftLeft(vec1, vec2.AsMask16x16()) - case "MaskedShiftRight": - gotv = vec0.MaskedShiftRight(vec1, vec2.AsMask16x16()) - case "MaskedShiftRightSignExtended": - gotv = vec0.MaskedShiftRightSignExtended(vec1, vec2.AsMask16x16()) - case "MaskedSub": - gotv = vec0.MaskedSub(vec1, vec2.AsMask16x16()) + case "AddMasked": + gotv = vec0.AddMasked(vec1, vec2.AsMask16x16()) + case "AverageMasked": + gotv = vec0.AverageMasked(vec1, vec2.AsMask16x16()) + case "MaxMasked": + gotv = vec0.MaxMasked(vec1, vec2.AsMask16x16()) + case "MinMasked": + gotv = vec0.MinMasked(vec1, vec2.AsMask16x16()) + case "MulHighMasked": + gotv = vec0.MulHighMasked(vec1, vec2.AsMask16x16()) + case "SaturatedAddMasked": + gotv = vec0.SaturatedAddMasked(vec1, vec2.AsMask16x16()) + case "SaturatedSubMasked": + gotv = vec0.SaturatedSubMasked(vec1, vec2.AsMask16x16()) + case "ShiftLeftMasked": + gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask16x16()) + case "ShiftRightMasked": + gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask16x16()) + case "ShiftRightSignExtendedMasked": + gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask16x16()) + case "SubMasked": + gotv = vec0.SubMasked(vec1, vec2.AsMask16x16()) default: t.Errorf("Unknown method: Uint16x16.%s", which) @@ -5825,18 +5825,18 @@ func testUint16x16MaskedCompare(t *testing.T, v0 []uint16, v1 []uint16, v2 []int vec1 := simd.LoadUint16x16Slice(v1) vec2 := simd.LoadInt16x16Slice(v2) switch which { - case "MaskedEqual": - gotv = vec0.MaskedEqual(vec1, vec2.AsMask16x16()).AsInt16x16() - case "MaskedGreater": - gotv = vec0.MaskedGreater(vec1, vec2.AsMask16x16()).AsInt16x16() - case "MaskedGreaterEqual": - gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask16x16()).AsInt16x16() - case "MaskedLess": - gotv = vec0.MaskedLess(vec1, vec2.AsMask16x16()).AsInt16x16() - case "MaskedLessEqual": - gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask16x16()).AsInt16x16() - case "MaskedNotEqual": - gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask16x16()).AsInt16x16() + case "EqualMasked": + gotv = vec0.EqualMasked(vec1, vec2.AsMask16x16()).AsInt16x16() + case "GreaterEqualMasked": + gotv = vec0.GreaterEqualMasked(vec1, vec2.AsMask16x16()).AsInt16x16() + case "GreaterMasked": + gotv = vec0.GreaterMasked(vec1, vec2.AsMask16x16()).AsInt16x16() + case "LessEqualMasked": + gotv = vec0.LessEqualMasked(vec1, vec2.AsMask16x16()).AsInt16x16() + case "LessMasked": + gotv = vec0.LessMasked(vec1, vec2.AsMask16x16()).AsInt16x16() + case "NotEqualMasked": + gotv = vec0.NotEqualMasked(vec1, vec2.AsMask16x16()).AsInt16x16() default: t.Errorf("Unknown method: Uint16x16.%s", which) @@ -5882,10 +5882,10 @@ func testUint16x16TernaryMasked(t *testing.T, v0 []uint16, v1 []uint16, v2 []uin vec2 := simd.LoadUint16x16Slice(v2) vec3 := simd.LoadInt16x16Slice(v3) switch which { - case "MaskedShiftLeftAndFillUpperFrom": - gotv = vec0.MaskedShiftLeftAndFillUpperFrom(vec1, vec2, vec3.AsMask16x16()) - case "MaskedShiftRightAndFillUpperFrom": - gotv = vec0.MaskedShiftRightAndFillUpperFrom(vec1, vec2, vec3.AsMask16x16()) + case "ShiftLeftAndFillUpperFromMasked": + gotv = vec0.ShiftLeftAndFillUpperFromMasked(vec1, vec2, vec3.AsMask16x16()) + case "ShiftRightAndFillUpperFromMasked": + gotv = vec0.ShiftRightAndFillUpperFromMasked(vec1, vec2, vec3.AsMask16x16()) default: t.Errorf("Unknown method: Uint16x16.%s", which) @@ -5925,8 +5925,8 @@ func testUint16x16UnaryMasked(t *testing.T, v0 []uint16, v1 []int16, want []uint vec0 := simd.LoadUint16x16Slice(v0) vec1 := simd.LoadInt16x16Slice(v1) switch which { - case "MaskedPopCount": - gotv = vec0.MaskedPopCount(vec1.AsMask16x16()) + case "PopCountMasked": + gotv = vec0.PopCountMasked(vec1.AsMask16x16()) default: t.Errorf("Unknown method: Uint16x16.%s", which) @@ -5988,28 +5988,28 @@ func testUint16x32BinaryMasked(t *testing.T, v0 []uint16, v1 []uint16, v2 []int1 vec1 := simd.LoadUint16x32Slice(v1) vec2 := simd.LoadInt16x32Slice(v2) switch which { - case "MaskedAdd": - gotv = vec0.MaskedAdd(vec1, vec2.AsMask16x32()) - case "MaskedAverage": - gotv = vec0.MaskedAverage(vec1, vec2.AsMask16x32()) - case "MaskedMax": - gotv = vec0.MaskedMax(vec1, vec2.AsMask16x32()) - case "MaskedMin": - gotv = vec0.MaskedMin(vec1, vec2.AsMask16x32()) - case "MaskedMulHigh": - gotv = vec0.MaskedMulHigh(vec1, vec2.AsMask16x32()) - case "MaskedSaturatedAdd": - gotv = vec0.MaskedSaturatedAdd(vec1, vec2.AsMask16x32()) - case "MaskedSaturatedSub": - gotv = vec0.MaskedSaturatedSub(vec1, vec2.AsMask16x32()) - case "MaskedShiftLeft": - gotv = vec0.MaskedShiftLeft(vec1, vec2.AsMask16x32()) - case "MaskedShiftRight": - gotv = vec0.MaskedShiftRight(vec1, vec2.AsMask16x32()) - case "MaskedShiftRightSignExtended": - gotv = vec0.MaskedShiftRightSignExtended(vec1, vec2.AsMask16x32()) - case "MaskedSub": - gotv = vec0.MaskedSub(vec1, vec2.AsMask16x32()) + case "AddMasked": + gotv = vec0.AddMasked(vec1, vec2.AsMask16x32()) + case "AverageMasked": + gotv = vec0.AverageMasked(vec1, vec2.AsMask16x32()) + case "MaxMasked": + gotv = vec0.MaxMasked(vec1, vec2.AsMask16x32()) + case "MinMasked": + gotv = vec0.MinMasked(vec1, vec2.AsMask16x32()) + case "MulHighMasked": + gotv = vec0.MulHighMasked(vec1, vec2.AsMask16x32()) + case "SaturatedAddMasked": + gotv = vec0.SaturatedAddMasked(vec1, vec2.AsMask16x32()) + case "SaturatedSubMasked": + gotv = vec0.SaturatedSubMasked(vec1, vec2.AsMask16x32()) + case "ShiftLeftMasked": + gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask16x32()) + case "ShiftRightMasked": + gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask16x32()) + case "ShiftRightSignExtendedMasked": + gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask16x32()) + case "SubMasked": + gotv = vec0.SubMasked(vec1, vec2.AsMask16x32()) default: t.Errorf("Unknown method: Uint16x32.%s", which) @@ -6061,18 +6061,18 @@ func testUint16x32MaskedCompare(t *testing.T, v0 []uint16, v1 []uint16, v2 []int vec1 := simd.LoadUint16x32Slice(v1) vec2 := simd.LoadInt16x32Slice(v2) switch which { - case "MaskedEqual": - gotv = vec0.MaskedEqual(vec1, vec2.AsMask16x32()).AsInt16x32() - case "MaskedGreater": - gotv = vec0.MaskedGreater(vec1, vec2.AsMask16x32()).AsInt16x32() - case "MaskedGreaterEqual": - gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask16x32()).AsInt16x32() - case "MaskedLess": - gotv = vec0.MaskedLess(vec1, vec2.AsMask16x32()).AsInt16x32() - case "MaskedLessEqual": - gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask16x32()).AsInt16x32() - case "MaskedNotEqual": - gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask16x32()).AsInt16x32() + case "EqualMasked": + gotv = vec0.EqualMasked(vec1, vec2.AsMask16x32()).AsInt16x32() + case "GreaterEqualMasked": + gotv = vec0.GreaterEqualMasked(vec1, vec2.AsMask16x32()).AsInt16x32() + case "GreaterMasked": + gotv = vec0.GreaterMasked(vec1, vec2.AsMask16x32()).AsInt16x32() + case "LessEqualMasked": + gotv = vec0.LessEqualMasked(vec1, vec2.AsMask16x32()).AsInt16x32() + case "LessMasked": + gotv = vec0.LessMasked(vec1, vec2.AsMask16x32()).AsInt16x32() + case "NotEqualMasked": + gotv = vec0.NotEqualMasked(vec1, vec2.AsMask16x32()).AsInt16x32() default: t.Errorf("Unknown method: Uint16x32.%s", which) @@ -6118,10 +6118,10 @@ func testUint16x32TernaryMasked(t *testing.T, v0 []uint16, v1 []uint16, v2 []uin vec2 := simd.LoadUint16x32Slice(v2) vec3 := simd.LoadInt16x32Slice(v3) switch which { - case "MaskedShiftLeftAndFillUpperFrom": - gotv = vec0.MaskedShiftLeftAndFillUpperFrom(vec1, vec2, vec3.AsMask16x32()) - case "MaskedShiftRightAndFillUpperFrom": - gotv = vec0.MaskedShiftRightAndFillUpperFrom(vec1, vec2, vec3.AsMask16x32()) + case "ShiftLeftAndFillUpperFromMasked": + gotv = vec0.ShiftLeftAndFillUpperFromMasked(vec1, vec2, vec3.AsMask16x32()) + case "ShiftRightAndFillUpperFromMasked": + gotv = vec0.ShiftRightAndFillUpperFromMasked(vec1, vec2, vec3.AsMask16x32()) default: t.Errorf("Unknown method: Uint16x32.%s", which) @@ -6161,8 +6161,8 @@ func testUint16x32UnaryMasked(t *testing.T, v0 []uint16, v1 []int16, want []uint vec0 := simd.LoadUint16x32Slice(v0) vec1 := simd.LoadInt16x32Slice(v1) switch which { - case "MaskedPopCount": - gotv = vec0.MaskedPopCount(vec1.AsMask16x32()) + case "PopCountMasked": + gotv = vec0.PopCountMasked(vec1.AsMask16x32()) default: t.Errorf("Unknown method: Uint16x32.%s", which) @@ -6232,32 +6232,32 @@ func testUint32x4BinaryMasked(t *testing.T, v0 []uint32, v1 []uint32, v2 []int32 vec1 := simd.LoadUint32x4Slice(v1) vec2 := simd.LoadInt32x4Slice(v2) switch which { - case "MaskedAdd": - gotv = vec0.MaskedAdd(vec1, vec2.AsMask32x4()) - case "MaskedAnd": - gotv = vec0.MaskedAnd(vec1, vec2.AsMask32x4()) - case "MaskedAndNot": - gotv = vec0.MaskedAndNot(vec1, vec2.AsMask32x4()) - case "MaskedMax": - gotv = vec0.MaskedMax(vec1, vec2.AsMask32x4()) - case "MaskedMin": - gotv = vec0.MaskedMin(vec1, vec2.AsMask32x4()) - case "MaskedOr": - gotv = vec0.MaskedOr(vec1, vec2.AsMask32x4()) - case "MaskedRotateLeft": - gotv = vec0.MaskedRotateLeft(vec1, vec2.AsMask32x4()) - case "MaskedRotateRight": - gotv = vec0.MaskedRotateRight(vec1, vec2.AsMask32x4()) - case "MaskedShiftLeft": - gotv = vec0.MaskedShiftLeft(vec1, vec2.AsMask32x4()) - case "MaskedShiftRight": - gotv = vec0.MaskedShiftRight(vec1, vec2.AsMask32x4()) - case "MaskedShiftRightSignExtended": - gotv = vec0.MaskedShiftRightSignExtended(vec1, vec2.AsMask32x4()) - case "MaskedSub": - gotv = vec0.MaskedSub(vec1, vec2.AsMask32x4()) - case "MaskedXor": - gotv = vec0.MaskedXor(vec1, vec2.AsMask32x4()) + case "AddMasked": + gotv = vec0.AddMasked(vec1, vec2.AsMask32x4()) + case "AndMasked": + gotv = vec0.AndMasked(vec1, vec2.AsMask32x4()) + case "AndNotMasked": + gotv = vec0.AndNotMasked(vec1, vec2.AsMask32x4()) + case "MaxMasked": + gotv = vec0.MaxMasked(vec1, vec2.AsMask32x4()) + case "MinMasked": + gotv = vec0.MinMasked(vec1, vec2.AsMask32x4()) + case "OrMasked": + gotv = vec0.OrMasked(vec1, vec2.AsMask32x4()) + case "RotateLeftMasked": + gotv = vec0.RotateLeftMasked(vec1, vec2.AsMask32x4()) + case "RotateRightMasked": + gotv = vec0.RotateRightMasked(vec1, vec2.AsMask32x4()) + case "ShiftLeftMasked": + gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask32x4()) + case "ShiftRightMasked": + gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask32x4()) + case "ShiftRightSignExtendedMasked": + gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask32x4()) + case "SubMasked": + gotv = vec0.SubMasked(vec1, vec2.AsMask32x4()) + case "XorMasked": + gotv = vec0.XorMasked(vec1, vec2.AsMask32x4()) default: t.Errorf("Unknown method: Uint32x4.%s", which) @@ -6330,18 +6330,18 @@ func testUint32x4MaskedCompare(t *testing.T, v0 []uint32, v1 []uint32, v2 []int3 vec1 := simd.LoadUint32x4Slice(v1) vec2 := simd.LoadInt32x4Slice(v2) switch which { - case "MaskedEqual": - gotv = vec0.MaskedEqual(vec1, vec2.AsMask32x4()).AsInt32x4() - case "MaskedGreater": - gotv = vec0.MaskedGreater(vec1, vec2.AsMask32x4()).AsInt32x4() - case "MaskedGreaterEqual": - gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask32x4()).AsInt32x4() - case "MaskedLess": - gotv = vec0.MaskedLess(vec1, vec2.AsMask32x4()).AsInt32x4() - case "MaskedLessEqual": - gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask32x4()).AsInt32x4() - case "MaskedNotEqual": - gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask32x4()).AsInt32x4() + case "EqualMasked": + gotv = vec0.EqualMasked(vec1, vec2.AsMask32x4()).AsInt32x4() + case "GreaterEqualMasked": + gotv = vec0.GreaterEqualMasked(vec1, vec2.AsMask32x4()).AsInt32x4() + case "GreaterMasked": + gotv = vec0.GreaterMasked(vec1, vec2.AsMask32x4()).AsInt32x4() + case "LessEqualMasked": + gotv = vec0.LessEqualMasked(vec1, vec2.AsMask32x4()).AsInt32x4() + case "LessMasked": + gotv = vec0.LessMasked(vec1, vec2.AsMask32x4()).AsInt32x4() + case "NotEqualMasked": + gotv = vec0.NotEqualMasked(vec1, vec2.AsMask32x4()).AsInt32x4() default: t.Errorf("Unknown method: Uint32x4.%s", which) @@ -6387,10 +6387,10 @@ func testUint32x4TernaryMasked(t *testing.T, v0 []uint32, v1 []uint32, v2 []uint vec2 := simd.LoadUint32x4Slice(v2) vec3 := simd.LoadInt32x4Slice(v3) switch which { - case "MaskedShiftLeftAndFillUpperFrom": - gotv = vec0.MaskedShiftLeftAndFillUpperFrom(vec1, vec2, vec3.AsMask32x4()) - case "MaskedShiftRightAndFillUpperFrom": - gotv = vec0.MaskedShiftRightAndFillUpperFrom(vec1, vec2, vec3.AsMask32x4()) + case "ShiftLeftAndFillUpperFromMasked": + gotv = vec0.ShiftLeftAndFillUpperFromMasked(vec1, vec2, vec3.AsMask32x4()) + case "ShiftRightAndFillUpperFromMasked": + gotv = vec0.ShiftRightAndFillUpperFromMasked(vec1, vec2, vec3.AsMask32x4()) default: t.Errorf("Unknown method: Uint32x4.%s", which) @@ -6412,10 +6412,10 @@ func testUint32x4Uint8x16Int8x16Mask32x4Uint32x4(t *testing.T, v0 []uint32, v1 [ vec2 := simd.LoadInt8x16Slice(v2) vec3 := simd.LoadInt32x4Slice(v3) switch which { - case "MaskedSaturatedUnsignedSignedQuadDotProdAccumulate": - gotv = vec0.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(vec1, vec2, vec3.AsMask32x4()) - case "MaskedUnsignedSignedQuadDotProdAccumulate": - gotv = vec0.MaskedUnsignedSignedQuadDotProdAccumulate(vec1, vec2, vec3.AsMask32x4()) + case "SaturatedUnsignedSignedQuadDotProdAccumulateMasked": + gotv = vec0.SaturatedUnsignedSignedQuadDotProdAccumulateMasked(vec1, vec2, vec3.AsMask32x4()) + case "UnsignedSignedQuadDotProdAccumulateMasked": + gotv = vec0.UnsignedSignedQuadDotProdAccumulateMasked(vec1, vec2, vec3.AsMask32x4()) default: t.Errorf("Unknown method: Uint32x4.%s", which) @@ -6479,8 +6479,8 @@ func testUint32x4UnaryMasked(t *testing.T, v0 []uint32, v1 []int32, want []uint3 vec0 := simd.LoadUint32x4Slice(v0) vec1 := simd.LoadInt32x4Slice(v1) switch which { - case "MaskedPopCount": - gotv = vec0.MaskedPopCount(vec1.AsMask32x4()) + case "PopCountMasked": + gotv = vec0.PopCountMasked(vec1.AsMask32x4()) default: t.Errorf("Unknown method: Uint32x4.%s", which) @@ -6550,32 +6550,32 @@ func testUint32x8BinaryMasked(t *testing.T, v0 []uint32, v1 []uint32, v2 []int32 vec1 := simd.LoadUint32x8Slice(v1) vec2 := simd.LoadInt32x8Slice(v2) switch which { - case "MaskedAdd": - gotv = vec0.MaskedAdd(vec1, vec2.AsMask32x8()) - case "MaskedAnd": - gotv = vec0.MaskedAnd(vec1, vec2.AsMask32x8()) - case "MaskedAndNot": - gotv = vec0.MaskedAndNot(vec1, vec2.AsMask32x8()) - case "MaskedMax": - gotv = vec0.MaskedMax(vec1, vec2.AsMask32x8()) - case "MaskedMin": - gotv = vec0.MaskedMin(vec1, vec2.AsMask32x8()) - case "MaskedOr": - gotv = vec0.MaskedOr(vec1, vec2.AsMask32x8()) - case "MaskedRotateLeft": - gotv = vec0.MaskedRotateLeft(vec1, vec2.AsMask32x8()) - case "MaskedRotateRight": - gotv = vec0.MaskedRotateRight(vec1, vec2.AsMask32x8()) - case "MaskedShiftLeft": - gotv = vec0.MaskedShiftLeft(vec1, vec2.AsMask32x8()) - case "MaskedShiftRight": - gotv = vec0.MaskedShiftRight(vec1, vec2.AsMask32x8()) - case "MaskedShiftRightSignExtended": - gotv = vec0.MaskedShiftRightSignExtended(vec1, vec2.AsMask32x8()) - case "MaskedSub": - gotv = vec0.MaskedSub(vec1, vec2.AsMask32x8()) - case "MaskedXor": - gotv = vec0.MaskedXor(vec1, vec2.AsMask32x8()) + case "AddMasked": + gotv = vec0.AddMasked(vec1, vec2.AsMask32x8()) + case "AndMasked": + gotv = vec0.AndMasked(vec1, vec2.AsMask32x8()) + case "AndNotMasked": + gotv = vec0.AndNotMasked(vec1, vec2.AsMask32x8()) + case "MaxMasked": + gotv = vec0.MaxMasked(vec1, vec2.AsMask32x8()) + case "MinMasked": + gotv = vec0.MinMasked(vec1, vec2.AsMask32x8()) + case "OrMasked": + gotv = vec0.OrMasked(vec1, vec2.AsMask32x8()) + case "RotateLeftMasked": + gotv = vec0.RotateLeftMasked(vec1, vec2.AsMask32x8()) + case "RotateRightMasked": + gotv = vec0.RotateRightMasked(vec1, vec2.AsMask32x8()) + case "ShiftLeftMasked": + gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask32x8()) + case "ShiftRightMasked": + gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask32x8()) + case "ShiftRightSignExtendedMasked": + gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask32x8()) + case "SubMasked": + gotv = vec0.SubMasked(vec1, vec2.AsMask32x8()) + case "XorMasked": + gotv = vec0.XorMasked(vec1, vec2.AsMask32x8()) default: t.Errorf("Unknown method: Uint32x8.%s", which) @@ -6648,18 +6648,18 @@ func testUint32x8MaskedCompare(t *testing.T, v0 []uint32, v1 []uint32, v2 []int3 vec1 := simd.LoadUint32x8Slice(v1) vec2 := simd.LoadInt32x8Slice(v2) switch which { - case "MaskedEqual": - gotv = vec0.MaskedEqual(vec1, vec2.AsMask32x8()).AsInt32x8() - case "MaskedGreater": - gotv = vec0.MaskedGreater(vec1, vec2.AsMask32x8()).AsInt32x8() - case "MaskedGreaterEqual": - gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask32x8()).AsInt32x8() - case "MaskedLess": - gotv = vec0.MaskedLess(vec1, vec2.AsMask32x8()).AsInt32x8() - case "MaskedLessEqual": - gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask32x8()).AsInt32x8() - case "MaskedNotEqual": - gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask32x8()).AsInt32x8() + case "EqualMasked": + gotv = vec0.EqualMasked(vec1, vec2.AsMask32x8()).AsInt32x8() + case "GreaterEqualMasked": + gotv = vec0.GreaterEqualMasked(vec1, vec2.AsMask32x8()).AsInt32x8() + case "GreaterMasked": + gotv = vec0.GreaterMasked(vec1, vec2.AsMask32x8()).AsInt32x8() + case "LessEqualMasked": + gotv = vec0.LessEqualMasked(vec1, vec2.AsMask32x8()).AsInt32x8() + case "LessMasked": + gotv = vec0.LessMasked(vec1, vec2.AsMask32x8()).AsInt32x8() + case "NotEqualMasked": + gotv = vec0.NotEqualMasked(vec1, vec2.AsMask32x8()).AsInt32x8() default: t.Errorf("Unknown method: Uint32x8.%s", which) @@ -6705,10 +6705,10 @@ func testUint32x8TernaryMasked(t *testing.T, v0 []uint32, v1 []uint32, v2 []uint vec2 := simd.LoadUint32x8Slice(v2) vec3 := simd.LoadInt32x8Slice(v3) switch which { - case "MaskedShiftLeftAndFillUpperFrom": - gotv = vec0.MaskedShiftLeftAndFillUpperFrom(vec1, vec2, vec3.AsMask32x8()) - case "MaskedShiftRightAndFillUpperFrom": - gotv = vec0.MaskedShiftRightAndFillUpperFrom(vec1, vec2, vec3.AsMask32x8()) + case "ShiftLeftAndFillUpperFromMasked": + gotv = vec0.ShiftLeftAndFillUpperFromMasked(vec1, vec2, vec3.AsMask32x8()) + case "ShiftRightAndFillUpperFromMasked": + gotv = vec0.ShiftRightAndFillUpperFromMasked(vec1, vec2, vec3.AsMask32x8()) default: t.Errorf("Unknown method: Uint32x8.%s", which) @@ -6730,10 +6730,10 @@ func testUint32x8Uint8x32Int8x32Mask32x8Uint32x8(t *testing.T, v0 []uint32, v1 [ vec2 := simd.LoadInt8x32Slice(v2) vec3 := simd.LoadInt32x8Slice(v3) switch which { - case "MaskedSaturatedUnsignedSignedQuadDotProdAccumulate": - gotv = vec0.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(vec1, vec2, vec3.AsMask32x8()) - case "MaskedUnsignedSignedQuadDotProdAccumulate": - gotv = vec0.MaskedUnsignedSignedQuadDotProdAccumulate(vec1, vec2, vec3.AsMask32x8()) + case "SaturatedUnsignedSignedQuadDotProdAccumulateMasked": + gotv = vec0.SaturatedUnsignedSignedQuadDotProdAccumulateMasked(vec1, vec2, vec3.AsMask32x8()) + case "UnsignedSignedQuadDotProdAccumulateMasked": + gotv = vec0.UnsignedSignedQuadDotProdAccumulateMasked(vec1, vec2, vec3.AsMask32x8()) default: t.Errorf("Unknown method: Uint32x8.%s", which) @@ -6797,8 +6797,8 @@ func testUint32x8UnaryMasked(t *testing.T, v0 []uint32, v1 []int32, want []uint3 vec0 := simd.LoadUint32x8Slice(v0) vec1 := simd.LoadInt32x8Slice(v1) switch which { - case "MaskedPopCount": - gotv = vec0.MaskedPopCount(vec1.AsMask32x8()) + case "PopCountMasked": + gotv = vec0.PopCountMasked(vec1.AsMask32x8()) default: t.Errorf("Unknown method: Uint32x8.%s", which) @@ -6864,32 +6864,32 @@ func testUint32x16BinaryMasked(t *testing.T, v0 []uint32, v1 []uint32, v2 []int3 vec1 := simd.LoadUint32x16Slice(v1) vec2 := simd.LoadInt32x16Slice(v2) switch which { - case "MaskedAdd": - gotv = vec0.MaskedAdd(vec1, vec2.AsMask32x16()) - case "MaskedAnd": - gotv = vec0.MaskedAnd(vec1, vec2.AsMask32x16()) - case "MaskedAndNot": - gotv = vec0.MaskedAndNot(vec1, vec2.AsMask32x16()) - case "MaskedMax": - gotv = vec0.MaskedMax(vec1, vec2.AsMask32x16()) - case "MaskedMin": - gotv = vec0.MaskedMin(vec1, vec2.AsMask32x16()) - case "MaskedOr": - gotv = vec0.MaskedOr(vec1, vec2.AsMask32x16()) - case "MaskedRotateLeft": - gotv = vec0.MaskedRotateLeft(vec1, vec2.AsMask32x16()) - case "MaskedRotateRight": - gotv = vec0.MaskedRotateRight(vec1, vec2.AsMask32x16()) - case "MaskedShiftLeft": - gotv = vec0.MaskedShiftLeft(vec1, vec2.AsMask32x16()) - case "MaskedShiftRight": - gotv = vec0.MaskedShiftRight(vec1, vec2.AsMask32x16()) - case "MaskedShiftRightSignExtended": - gotv = vec0.MaskedShiftRightSignExtended(vec1, vec2.AsMask32x16()) - case "MaskedSub": - gotv = vec0.MaskedSub(vec1, vec2.AsMask32x16()) - case "MaskedXor": - gotv = vec0.MaskedXor(vec1, vec2.AsMask32x16()) + case "AddMasked": + gotv = vec0.AddMasked(vec1, vec2.AsMask32x16()) + case "AndMasked": + gotv = vec0.AndMasked(vec1, vec2.AsMask32x16()) + case "AndNotMasked": + gotv = vec0.AndNotMasked(vec1, vec2.AsMask32x16()) + case "MaxMasked": + gotv = vec0.MaxMasked(vec1, vec2.AsMask32x16()) + case "MinMasked": + gotv = vec0.MinMasked(vec1, vec2.AsMask32x16()) + case "OrMasked": + gotv = vec0.OrMasked(vec1, vec2.AsMask32x16()) + case "RotateLeftMasked": + gotv = vec0.RotateLeftMasked(vec1, vec2.AsMask32x16()) + case "RotateRightMasked": + gotv = vec0.RotateRightMasked(vec1, vec2.AsMask32x16()) + case "ShiftLeftMasked": + gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask32x16()) + case "ShiftRightMasked": + gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask32x16()) + case "ShiftRightSignExtendedMasked": + gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask32x16()) + case "SubMasked": + gotv = vec0.SubMasked(vec1, vec2.AsMask32x16()) + case "XorMasked": + gotv = vec0.XorMasked(vec1, vec2.AsMask32x16()) default: t.Errorf("Unknown method: Uint32x16.%s", which) @@ -6941,18 +6941,18 @@ func testUint32x16MaskedCompare(t *testing.T, v0 []uint32, v1 []uint32, v2 []int vec1 := simd.LoadUint32x16Slice(v1) vec2 := simd.LoadInt32x16Slice(v2) switch which { - case "MaskedEqual": - gotv = vec0.MaskedEqual(vec1, vec2.AsMask32x16()).AsInt32x16() - case "MaskedGreater": - gotv = vec0.MaskedGreater(vec1, vec2.AsMask32x16()).AsInt32x16() - case "MaskedGreaterEqual": - gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask32x16()).AsInt32x16() - case "MaskedLess": - gotv = vec0.MaskedLess(vec1, vec2.AsMask32x16()).AsInt32x16() - case "MaskedLessEqual": - gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask32x16()).AsInt32x16() - case "MaskedNotEqual": - gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask32x16()).AsInt32x16() + case "EqualMasked": + gotv = vec0.EqualMasked(vec1, vec2.AsMask32x16()).AsInt32x16() + case "GreaterEqualMasked": + gotv = vec0.GreaterEqualMasked(vec1, vec2.AsMask32x16()).AsInt32x16() + case "GreaterMasked": + gotv = vec0.GreaterMasked(vec1, vec2.AsMask32x16()).AsInt32x16() + case "LessEqualMasked": + gotv = vec0.LessEqualMasked(vec1, vec2.AsMask32x16()).AsInt32x16() + case "LessMasked": + gotv = vec0.LessMasked(vec1, vec2.AsMask32x16()).AsInt32x16() + case "NotEqualMasked": + gotv = vec0.NotEqualMasked(vec1, vec2.AsMask32x16()).AsInt32x16() default: t.Errorf("Unknown method: Uint32x16.%s", which) @@ -6998,10 +6998,10 @@ func testUint32x16TernaryMasked(t *testing.T, v0 []uint32, v1 []uint32, v2 []uin vec2 := simd.LoadUint32x16Slice(v2) vec3 := simd.LoadInt32x16Slice(v3) switch which { - case "MaskedShiftLeftAndFillUpperFrom": - gotv = vec0.MaskedShiftLeftAndFillUpperFrom(vec1, vec2, vec3.AsMask32x16()) - case "MaskedShiftRightAndFillUpperFrom": - gotv = vec0.MaskedShiftRightAndFillUpperFrom(vec1, vec2, vec3.AsMask32x16()) + case "ShiftLeftAndFillUpperFromMasked": + gotv = vec0.ShiftLeftAndFillUpperFromMasked(vec1, vec2, vec3.AsMask32x16()) + case "ShiftRightAndFillUpperFromMasked": + gotv = vec0.ShiftRightAndFillUpperFromMasked(vec1, vec2, vec3.AsMask32x16()) default: t.Errorf("Unknown method: Uint32x16.%s", which) @@ -7023,10 +7023,10 @@ func testUint32x16Uint8x64Int8x64Mask32x16Uint32x16(t *testing.T, v0 []uint32, v vec2 := simd.LoadInt8x64Slice(v2) vec3 := simd.LoadInt32x16Slice(v3) switch which { - case "MaskedSaturatedUnsignedSignedQuadDotProdAccumulate": - gotv = vec0.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate(vec1, vec2, vec3.AsMask32x16()) - case "MaskedUnsignedSignedQuadDotProdAccumulate": - gotv = vec0.MaskedUnsignedSignedQuadDotProdAccumulate(vec1, vec2, vec3.AsMask32x16()) + case "SaturatedUnsignedSignedQuadDotProdAccumulateMasked": + gotv = vec0.SaturatedUnsignedSignedQuadDotProdAccumulateMasked(vec1, vec2, vec3.AsMask32x16()) + case "UnsignedSignedQuadDotProdAccumulateMasked": + gotv = vec0.UnsignedSignedQuadDotProdAccumulateMasked(vec1, vec2, vec3.AsMask32x16()) default: t.Errorf("Unknown method: Uint32x16.%s", which) @@ -7090,8 +7090,8 @@ func testUint32x16UnaryMasked(t *testing.T, v0 []uint32, v1 []int32, want []uint vec0 := simd.LoadUint32x16Slice(v0) vec1 := simd.LoadInt32x16Slice(v1) switch which { - case "MaskedPopCount": - gotv = vec0.MaskedPopCount(vec1.AsMask32x16()) + case "PopCountMasked": + gotv = vec0.PopCountMasked(vec1.AsMask32x16()) default: t.Errorf("Unknown method: Uint32x16.%s", which) @@ -7159,34 +7159,34 @@ func testUint64x2BinaryMasked(t *testing.T, v0 []uint64, v1 []uint64, v2 []int64 vec1 := simd.LoadUint64x2Slice(v1) vec2 := simd.LoadInt64x2Slice(v2) switch which { - case "MaskedAdd": - gotv = vec0.MaskedAdd(vec1, vec2.AsMask64x2()) - case "MaskedAnd": - gotv = vec0.MaskedAnd(vec1, vec2.AsMask64x2()) - case "MaskedAndNot": - gotv = vec0.MaskedAndNot(vec1, vec2.AsMask64x2()) - case "MaskedMax": - gotv = vec0.MaskedMax(vec1, vec2.AsMask64x2()) - case "MaskedMin": - gotv = vec0.MaskedMin(vec1, vec2.AsMask64x2()) - case "MaskedMulEvenWiden": - gotv = vec0.MaskedMulEvenWiden(vec1, vec2.AsMask64x2()) - case "MaskedOr": - gotv = vec0.MaskedOr(vec1, vec2.AsMask64x2()) - case "MaskedRotateLeft": - gotv = vec0.MaskedRotateLeft(vec1, vec2.AsMask64x2()) - case "MaskedRotateRight": - gotv = vec0.MaskedRotateRight(vec1, vec2.AsMask64x2()) - case "MaskedShiftLeft": - gotv = vec0.MaskedShiftLeft(vec1, vec2.AsMask64x2()) - case "MaskedShiftRight": - gotv = vec0.MaskedShiftRight(vec1, vec2.AsMask64x2()) - case "MaskedShiftRightSignExtended": - gotv = vec0.MaskedShiftRightSignExtended(vec1, vec2.AsMask64x2()) - case "MaskedSub": - gotv = vec0.MaskedSub(vec1, vec2.AsMask64x2()) - case "MaskedXor": - gotv = vec0.MaskedXor(vec1, vec2.AsMask64x2()) + case "AddMasked": + gotv = vec0.AddMasked(vec1, vec2.AsMask64x2()) + case "AndMasked": + gotv = vec0.AndMasked(vec1, vec2.AsMask64x2()) + case "AndNotMasked": + gotv = vec0.AndNotMasked(vec1, vec2.AsMask64x2()) + case "MaxMasked": + gotv = vec0.MaxMasked(vec1, vec2.AsMask64x2()) + case "MinMasked": + gotv = vec0.MinMasked(vec1, vec2.AsMask64x2()) + case "MulEvenWidenMasked": + gotv = vec0.MulEvenWidenMasked(vec1, vec2.AsMask64x2()) + case "OrMasked": + gotv = vec0.OrMasked(vec1, vec2.AsMask64x2()) + case "RotateLeftMasked": + gotv = vec0.RotateLeftMasked(vec1, vec2.AsMask64x2()) + case "RotateRightMasked": + gotv = vec0.RotateRightMasked(vec1, vec2.AsMask64x2()) + case "ShiftLeftMasked": + gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask64x2()) + case "ShiftRightMasked": + gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask64x2()) + case "ShiftRightSignExtendedMasked": + gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask64x2()) + case "SubMasked": + gotv = vec0.SubMasked(vec1, vec2.AsMask64x2()) + case "XorMasked": + gotv = vec0.XorMasked(vec1, vec2.AsMask64x2()) default: t.Errorf("Unknown method: Uint64x2.%s", which) @@ -7238,18 +7238,18 @@ func testUint64x2MaskedCompare(t *testing.T, v0 []uint64, v1 []uint64, v2 []int6 vec1 := simd.LoadUint64x2Slice(v1) vec2 := simd.LoadInt64x2Slice(v2) switch which { - case "MaskedEqual": - gotv = vec0.MaskedEqual(vec1, vec2.AsMask64x2()).AsInt64x2() - case "MaskedGreater": - gotv = vec0.MaskedGreater(vec1, vec2.AsMask64x2()).AsInt64x2() - case "MaskedGreaterEqual": - gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask64x2()).AsInt64x2() - case "MaskedLess": - gotv = vec0.MaskedLess(vec1, vec2.AsMask64x2()).AsInt64x2() - case "MaskedLessEqual": - gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask64x2()).AsInt64x2() - case "MaskedNotEqual": - gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask64x2()).AsInt64x2() + case "EqualMasked": + gotv = vec0.EqualMasked(vec1, vec2.AsMask64x2()).AsInt64x2() + case "GreaterEqualMasked": + gotv = vec0.GreaterEqualMasked(vec1, vec2.AsMask64x2()).AsInt64x2() + case "GreaterMasked": + gotv = vec0.GreaterMasked(vec1, vec2.AsMask64x2()).AsInt64x2() + case "LessEqualMasked": + gotv = vec0.LessEqualMasked(vec1, vec2.AsMask64x2()).AsInt64x2() + case "LessMasked": + gotv = vec0.LessMasked(vec1, vec2.AsMask64x2()).AsInt64x2() + case "NotEqualMasked": + gotv = vec0.NotEqualMasked(vec1, vec2.AsMask64x2()).AsInt64x2() default: t.Errorf("Unknown method: Uint64x2.%s", which) @@ -7295,10 +7295,10 @@ func testUint64x2TernaryMasked(t *testing.T, v0 []uint64, v1 []uint64, v2 []uint vec2 := simd.LoadUint64x2Slice(v2) vec3 := simd.LoadInt64x2Slice(v3) switch which { - case "MaskedShiftLeftAndFillUpperFrom": - gotv = vec0.MaskedShiftLeftAndFillUpperFrom(vec1, vec2, vec3.AsMask64x2()) - case "MaskedShiftRightAndFillUpperFrom": - gotv = vec0.MaskedShiftRightAndFillUpperFrom(vec1, vec2, vec3.AsMask64x2()) + case "ShiftLeftAndFillUpperFromMasked": + gotv = vec0.ShiftLeftAndFillUpperFromMasked(vec1, vec2, vec3.AsMask64x2()) + case "ShiftRightAndFillUpperFromMasked": + gotv = vec0.ShiftRightAndFillUpperFromMasked(vec1, vec2, vec3.AsMask64x2()) default: t.Errorf("Unknown method: Uint64x2.%s", which) @@ -7338,8 +7338,8 @@ func testUint64x2UnaryMasked(t *testing.T, v0 []uint64, v1 []int64, want []uint6 vec0 := simd.LoadUint64x2Slice(v0) vec1 := simd.LoadInt64x2Slice(v1) switch which { - case "MaskedPopCount": - gotv = vec0.MaskedPopCount(vec1.AsMask64x2()) + case "PopCountMasked": + gotv = vec0.PopCountMasked(vec1.AsMask64x2()) default: t.Errorf("Unknown method: Uint64x2.%s", which) @@ -7407,34 +7407,34 @@ func testUint64x4BinaryMasked(t *testing.T, v0 []uint64, v1 []uint64, v2 []int64 vec1 := simd.LoadUint64x4Slice(v1) vec2 := simd.LoadInt64x4Slice(v2) switch which { - case "MaskedAdd": - gotv = vec0.MaskedAdd(vec1, vec2.AsMask64x4()) - case "MaskedAnd": - gotv = vec0.MaskedAnd(vec1, vec2.AsMask64x4()) - case "MaskedAndNot": - gotv = vec0.MaskedAndNot(vec1, vec2.AsMask64x4()) - case "MaskedMax": - gotv = vec0.MaskedMax(vec1, vec2.AsMask64x4()) - case "MaskedMin": - gotv = vec0.MaskedMin(vec1, vec2.AsMask64x4()) - case "MaskedMulEvenWiden": - gotv = vec0.MaskedMulEvenWiden(vec1, vec2.AsMask64x4()) - case "MaskedOr": - gotv = vec0.MaskedOr(vec1, vec2.AsMask64x4()) - case "MaskedRotateLeft": - gotv = vec0.MaskedRotateLeft(vec1, vec2.AsMask64x4()) - case "MaskedRotateRight": - gotv = vec0.MaskedRotateRight(vec1, vec2.AsMask64x4()) - case "MaskedShiftLeft": - gotv = vec0.MaskedShiftLeft(vec1, vec2.AsMask64x4()) - case "MaskedShiftRight": - gotv = vec0.MaskedShiftRight(vec1, vec2.AsMask64x4()) - case "MaskedShiftRightSignExtended": - gotv = vec0.MaskedShiftRightSignExtended(vec1, vec2.AsMask64x4()) - case "MaskedSub": - gotv = vec0.MaskedSub(vec1, vec2.AsMask64x4()) - case "MaskedXor": - gotv = vec0.MaskedXor(vec1, vec2.AsMask64x4()) + case "AddMasked": + gotv = vec0.AddMasked(vec1, vec2.AsMask64x4()) + case "AndMasked": + gotv = vec0.AndMasked(vec1, vec2.AsMask64x4()) + case "AndNotMasked": + gotv = vec0.AndNotMasked(vec1, vec2.AsMask64x4()) + case "MaxMasked": + gotv = vec0.MaxMasked(vec1, vec2.AsMask64x4()) + case "MinMasked": + gotv = vec0.MinMasked(vec1, vec2.AsMask64x4()) + case "MulEvenWidenMasked": + gotv = vec0.MulEvenWidenMasked(vec1, vec2.AsMask64x4()) + case "OrMasked": + gotv = vec0.OrMasked(vec1, vec2.AsMask64x4()) + case "RotateLeftMasked": + gotv = vec0.RotateLeftMasked(vec1, vec2.AsMask64x4()) + case "RotateRightMasked": + gotv = vec0.RotateRightMasked(vec1, vec2.AsMask64x4()) + case "ShiftLeftMasked": + gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask64x4()) + case "ShiftRightMasked": + gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask64x4()) + case "ShiftRightSignExtendedMasked": + gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask64x4()) + case "SubMasked": + gotv = vec0.SubMasked(vec1, vec2.AsMask64x4()) + case "XorMasked": + gotv = vec0.XorMasked(vec1, vec2.AsMask64x4()) default: t.Errorf("Unknown method: Uint64x4.%s", which) @@ -7486,18 +7486,18 @@ func testUint64x4MaskedCompare(t *testing.T, v0 []uint64, v1 []uint64, v2 []int6 vec1 := simd.LoadUint64x4Slice(v1) vec2 := simd.LoadInt64x4Slice(v2) switch which { - case "MaskedEqual": - gotv = vec0.MaskedEqual(vec1, vec2.AsMask64x4()).AsInt64x4() - case "MaskedGreater": - gotv = vec0.MaskedGreater(vec1, vec2.AsMask64x4()).AsInt64x4() - case "MaskedGreaterEqual": - gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask64x4()).AsInt64x4() - case "MaskedLess": - gotv = vec0.MaskedLess(vec1, vec2.AsMask64x4()).AsInt64x4() - case "MaskedLessEqual": - gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask64x4()).AsInt64x4() - case "MaskedNotEqual": - gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask64x4()).AsInt64x4() + case "EqualMasked": + gotv = vec0.EqualMasked(vec1, vec2.AsMask64x4()).AsInt64x4() + case "GreaterEqualMasked": + gotv = vec0.GreaterEqualMasked(vec1, vec2.AsMask64x4()).AsInt64x4() + case "GreaterMasked": + gotv = vec0.GreaterMasked(vec1, vec2.AsMask64x4()).AsInt64x4() + case "LessEqualMasked": + gotv = vec0.LessEqualMasked(vec1, vec2.AsMask64x4()).AsInt64x4() + case "LessMasked": + gotv = vec0.LessMasked(vec1, vec2.AsMask64x4()).AsInt64x4() + case "NotEqualMasked": + gotv = vec0.NotEqualMasked(vec1, vec2.AsMask64x4()).AsInt64x4() default: t.Errorf("Unknown method: Uint64x4.%s", which) @@ -7543,10 +7543,10 @@ func testUint64x4TernaryMasked(t *testing.T, v0 []uint64, v1 []uint64, v2 []uint vec2 := simd.LoadUint64x4Slice(v2) vec3 := simd.LoadInt64x4Slice(v3) switch which { - case "MaskedShiftLeftAndFillUpperFrom": - gotv = vec0.MaskedShiftLeftAndFillUpperFrom(vec1, vec2, vec3.AsMask64x4()) - case "MaskedShiftRightAndFillUpperFrom": - gotv = vec0.MaskedShiftRightAndFillUpperFrom(vec1, vec2, vec3.AsMask64x4()) + case "ShiftLeftAndFillUpperFromMasked": + gotv = vec0.ShiftLeftAndFillUpperFromMasked(vec1, vec2, vec3.AsMask64x4()) + case "ShiftRightAndFillUpperFromMasked": + gotv = vec0.ShiftRightAndFillUpperFromMasked(vec1, vec2, vec3.AsMask64x4()) default: t.Errorf("Unknown method: Uint64x4.%s", which) @@ -7586,8 +7586,8 @@ func testUint64x4UnaryMasked(t *testing.T, v0 []uint64, v1 []int64, want []uint6 vec0 := simd.LoadUint64x4Slice(v0) vec1 := simd.LoadInt64x4Slice(v1) switch which { - case "MaskedPopCount": - gotv = vec0.MaskedPopCount(vec1.AsMask64x4()) + case "PopCountMasked": + gotv = vec0.PopCountMasked(vec1.AsMask64x4()) default: t.Errorf("Unknown method: Uint64x4.%s", which) @@ -7655,34 +7655,34 @@ func testUint64x8BinaryMasked(t *testing.T, v0 []uint64, v1 []uint64, v2 []int64 vec1 := simd.LoadUint64x8Slice(v1) vec2 := simd.LoadInt64x8Slice(v2) switch which { - case "MaskedAdd": - gotv = vec0.MaskedAdd(vec1, vec2.AsMask64x8()) - case "MaskedAnd": - gotv = vec0.MaskedAnd(vec1, vec2.AsMask64x8()) - case "MaskedAndNot": - gotv = vec0.MaskedAndNot(vec1, vec2.AsMask64x8()) - case "MaskedMax": - gotv = vec0.MaskedMax(vec1, vec2.AsMask64x8()) - case "MaskedMin": - gotv = vec0.MaskedMin(vec1, vec2.AsMask64x8()) - case "MaskedMulEvenWiden": - gotv = vec0.MaskedMulEvenWiden(vec1, vec2.AsMask64x8()) - case "MaskedOr": - gotv = vec0.MaskedOr(vec1, vec2.AsMask64x8()) - case "MaskedRotateLeft": - gotv = vec0.MaskedRotateLeft(vec1, vec2.AsMask64x8()) - case "MaskedRotateRight": - gotv = vec0.MaskedRotateRight(vec1, vec2.AsMask64x8()) - case "MaskedShiftLeft": - gotv = vec0.MaskedShiftLeft(vec1, vec2.AsMask64x8()) - case "MaskedShiftRight": - gotv = vec0.MaskedShiftRight(vec1, vec2.AsMask64x8()) - case "MaskedShiftRightSignExtended": - gotv = vec0.MaskedShiftRightSignExtended(vec1, vec2.AsMask64x8()) - case "MaskedSub": - gotv = vec0.MaskedSub(vec1, vec2.AsMask64x8()) - case "MaskedXor": - gotv = vec0.MaskedXor(vec1, vec2.AsMask64x8()) + case "AddMasked": + gotv = vec0.AddMasked(vec1, vec2.AsMask64x8()) + case "AndMasked": + gotv = vec0.AndMasked(vec1, vec2.AsMask64x8()) + case "AndNotMasked": + gotv = vec0.AndNotMasked(vec1, vec2.AsMask64x8()) + case "MaxMasked": + gotv = vec0.MaxMasked(vec1, vec2.AsMask64x8()) + case "MinMasked": + gotv = vec0.MinMasked(vec1, vec2.AsMask64x8()) + case "MulEvenWidenMasked": + gotv = vec0.MulEvenWidenMasked(vec1, vec2.AsMask64x8()) + case "OrMasked": + gotv = vec0.OrMasked(vec1, vec2.AsMask64x8()) + case "RotateLeftMasked": + gotv = vec0.RotateLeftMasked(vec1, vec2.AsMask64x8()) + case "RotateRightMasked": + gotv = vec0.RotateRightMasked(vec1, vec2.AsMask64x8()) + case "ShiftLeftMasked": + gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask64x8()) + case "ShiftRightMasked": + gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask64x8()) + case "ShiftRightSignExtendedMasked": + gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask64x8()) + case "SubMasked": + gotv = vec0.SubMasked(vec1, vec2.AsMask64x8()) + case "XorMasked": + gotv = vec0.XorMasked(vec1, vec2.AsMask64x8()) default: t.Errorf("Unknown method: Uint64x8.%s", which) @@ -7734,18 +7734,18 @@ func testUint64x8MaskedCompare(t *testing.T, v0 []uint64, v1 []uint64, v2 []int6 vec1 := simd.LoadUint64x8Slice(v1) vec2 := simd.LoadInt64x8Slice(v2) switch which { - case "MaskedEqual": - gotv = vec0.MaskedEqual(vec1, vec2.AsMask64x8()).AsInt64x8() - case "MaskedGreater": - gotv = vec0.MaskedGreater(vec1, vec2.AsMask64x8()).AsInt64x8() - case "MaskedGreaterEqual": - gotv = vec0.MaskedGreaterEqual(vec1, vec2.AsMask64x8()).AsInt64x8() - case "MaskedLess": - gotv = vec0.MaskedLess(vec1, vec2.AsMask64x8()).AsInt64x8() - case "MaskedLessEqual": - gotv = vec0.MaskedLessEqual(vec1, vec2.AsMask64x8()).AsInt64x8() - case "MaskedNotEqual": - gotv = vec0.MaskedNotEqual(vec1, vec2.AsMask64x8()).AsInt64x8() + case "EqualMasked": + gotv = vec0.EqualMasked(vec1, vec2.AsMask64x8()).AsInt64x8() + case "GreaterEqualMasked": + gotv = vec0.GreaterEqualMasked(vec1, vec2.AsMask64x8()).AsInt64x8() + case "GreaterMasked": + gotv = vec0.GreaterMasked(vec1, vec2.AsMask64x8()).AsInt64x8() + case "LessEqualMasked": + gotv = vec0.LessEqualMasked(vec1, vec2.AsMask64x8()).AsInt64x8() + case "LessMasked": + gotv = vec0.LessMasked(vec1, vec2.AsMask64x8()).AsInt64x8() + case "NotEqualMasked": + gotv = vec0.NotEqualMasked(vec1, vec2.AsMask64x8()).AsInt64x8() default: t.Errorf("Unknown method: Uint64x8.%s", which) @@ -7791,10 +7791,10 @@ func testUint64x8TernaryMasked(t *testing.T, v0 []uint64, v1 []uint64, v2 []uint vec2 := simd.LoadUint64x8Slice(v2) vec3 := simd.LoadInt64x8Slice(v3) switch which { - case "MaskedShiftLeftAndFillUpperFrom": - gotv = vec0.MaskedShiftLeftAndFillUpperFrom(vec1, vec2, vec3.AsMask64x8()) - case "MaskedShiftRightAndFillUpperFrom": - gotv = vec0.MaskedShiftRightAndFillUpperFrom(vec1, vec2, vec3.AsMask64x8()) + case "ShiftLeftAndFillUpperFromMasked": + gotv = vec0.ShiftLeftAndFillUpperFromMasked(vec1, vec2, vec3.AsMask64x8()) + case "ShiftRightAndFillUpperFromMasked": + gotv = vec0.ShiftRightAndFillUpperFromMasked(vec1, vec2, vec3.AsMask64x8()) default: t.Errorf("Unknown method: Uint64x8.%s", which) @@ -7834,8 +7834,8 @@ func testUint64x8UnaryMasked(t *testing.T, v0 []uint64, v1 []int64, want []uint6 vec0 := simd.LoadUint64x8Slice(v0) vec1 := simd.LoadInt64x8Slice(v1) switch which { - case "MaskedPopCount": - gotv = vec0.MaskedPopCount(vec1.AsMask64x8()) + case "PopCountMasked": + gotv = vec0.PopCountMasked(vec1.AsMask64x8()) default: t.Errorf("Unknown method: Uint64x8.%s", which) @@ -7851,40 +7851,40 @@ func testUint64x8UnaryMasked(t *testing.T, v0 []uint64, v1 []int64, want []uint6 /* The operations below cannot be tested via wrappers, please test them directly */ // CeilWithPrecision +// CeilWithPrecisionMasked // DiffWithCeilWithPrecision +// DiffWithCeilWithPrecisionMasked // DiffWithFloorWithPrecision +// DiffWithFloorWithPrecisionMasked // DiffWithRoundWithPrecision +// DiffWithRoundWithPrecisionMasked // DiffWithTruncWithPrecision +// DiffWithTruncWithPrecisionMasked // FloorWithPrecision +// FloorWithPrecisionMasked // GaloisFieldAffineTransform // GaloisFieldAffineTransformInversed +// GaloisFieldAffineTransformInversedMasked +// GaloisFieldAffineTransformMasked // Get128 // GetElem -// MaskedCeilWithPrecision -// MaskedDiffWithCeilWithPrecision -// MaskedDiffWithFloorWithPrecision -// MaskedDiffWithRoundWithPrecision -// MaskedDiffWithTruncWithPrecision -// MaskedFloorWithPrecision -// MaskedGaloisFieldAffineTransform -// MaskedGaloisFieldAffineTransformInversed -// MaskedRotateAllLeft -// MaskedRotateAllRight -// MaskedRoundWithPrecision -// MaskedShiftAllLeft -// MaskedShiftAllLeftAndFillUpperFrom -// MaskedShiftAllRight -// MaskedShiftAllRightAndFillUpperFrom -// MaskedShiftAllRightSignExtended -// MaskedTruncWithPrecision // RotateAllLeft +// RotateAllLeftMasked // RotateAllRight +// RotateAllRightMasked // RoundWithPrecision +// RoundWithPrecisionMasked // Set128 // SetElem // ShiftAllLeft // ShiftAllLeftAndFillUpperFrom +// ShiftAllLeftAndFillUpperFromMasked +// ShiftAllLeftMasked // ShiftAllRight // ShiftAllRightAndFillUpperFrom +// ShiftAllRightAndFillUpperFromMasked +// ShiftAllRightMasked // ShiftAllRightSignExtended +// ShiftAllRightSignExtendedMasked // TruncWithPrecision +// TruncWithPrecisionMasked -- 2.52.0