From b69622b83e38b58a461938163fdef03683a2a871 Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Fri, 11 Jul 2025 17:56:22 +0000 Subject: [PATCH] [dev.simd] cmd/compile, simd: adjust Shift.* operations This CL does: 1. Removes ShiftRightSignExtended, default signed vectors to shift arithmetic, and unsigned to shift logical. 2. Add the missing Shifts which were left out by YAML error in the generator. This CL is generated by CL 687595. Change-Id: I663115498adb91c82e89a8476e6748794e997cfa Reviewed-on: https://go-review.googlesource.com/c/go/+/687596 LUCI-TryBot-Result: Go LUCI Reviewed-by: David Chase Reviewed-by: Cherry Mui --- src/cmd/compile/internal/amd64/simdssa.go | 128 +- .../compile/internal/ssa/_gen/simdAMD64.rules | 134 +- .../compile/internal/ssa/_gen/simdAMD64ops.go | 98 +- .../internal/ssa/_gen/simdgenericOps.go | 78 +- src/cmd/compile/internal/ssa/opGen.go | 1688 ++++++++++------- src/cmd/compile/internal/ssa/rewriteAMD64.go | 1025 +++++----- .../compile/internal/ssagen/simdintrinsics.go | 78 +- src/simd/ops_amd64.go | 490 ++--- src/simd/simd_wrapped_test.go | 74 - 9 files changed, 1984 insertions(+), 1809 deletions(-) diff --git a/src/cmd/compile/internal/amd64/simdssa.go b/src/cmd/compile/internal/amd64/simdssa.go index 12a8c857bd..e2d0dd17c6 100644 --- a/src/cmd/compile/internal/amd64/simdssa.go +++ b/src/cmd/compile/internal/amd64/simdssa.go @@ -273,15 +273,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPSLLVQ128, ssa.OpAMD64VPSLLVQ256, ssa.OpAMD64VPSLLVQ512, - ssa.OpAMD64VPSRLVW128, - ssa.OpAMD64VPSRLVW256, - ssa.OpAMD64VPSRLVW512, - ssa.OpAMD64VPSRLVD128, - ssa.OpAMD64VPSRLVD256, - ssa.OpAMD64VPSRLVD512, - ssa.OpAMD64VPSRLVQ128, - ssa.OpAMD64VPSRLVQ256, - ssa.OpAMD64VPSRLVQ512, ssa.OpAMD64VPSRAVW128, ssa.OpAMD64VPSRAVW256, ssa.OpAMD64VPSRAVW512, @@ -291,6 +282,15 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPSRAVQ128, ssa.OpAMD64VPSRAVQ256, ssa.OpAMD64VPSRAVQ512, + ssa.OpAMD64VPSRLVW128, + ssa.OpAMD64VPSRLVW256, + ssa.OpAMD64VPSRLVW512, + ssa.OpAMD64VPSRLVD128, + ssa.OpAMD64VPSRLVD256, + ssa.OpAMD64VPSRLVD512, + ssa.OpAMD64VPSRLVQ128, + ssa.OpAMD64VPSRLVQ256, + ssa.OpAMD64VPSRLVQ512, ssa.OpAMD64VPSIGNB128, ssa.OpAMD64VPSIGNB256, ssa.OpAMD64VPSIGNW128, @@ -504,15 +504,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPSLLVQMasked128, ssa.OpAMD64VPSLLVQMasked256, ssa.OpAMD64VPSLLVQMasked512, - ssa.OpAMD64VPSRLVWMasked128, - ssa.OpAMD64VPSRLVWMasked256, - ssa.OpAMD64VPSRLVWMasked512, - ssa.OpAMD64VPSRLVDMasked128, - ssa.OpAMD64VPSRLVDMasked256, - ssa.OpAMD64VPSRLVDMasked512, - ssa.OpAMD64VPSRLVQMasked128, - ssa.OpAMD64VPSRLVQMasked256, - ssa.OpAMD64VPSRLVQMasked512, ssa.OpAMD64VPSRAVWMasked128, ssa.OpAMD64VPSRAVWMasked256, ssa.OpAMD64VPSRAVWMasked512, @@ -522,6 +513,15 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPSRAVQMasked128, ssa.OpAMD64VPSRAVQMasked256, ssa.OpAMD64VPSRAVQMasked512, + ssa.OpAMD64VPSRLVWMasked128, + ssa.OpAMD64VPSRLVWMasked256, + ssa.OpAMD64VPSRLVWMasked512, + ssa.OpAMD64VPSRLVDMasked128, + ssa.OpAMD64VPSRLVDMasked256, + ssa.OpAMD64VPSRLVDMasked512, + ssa.OpAMD64VPSRLVQMasked128, + ssa.OpAMD64VPSRLVQMasked256, + ssa.OpAMD64VPSRLVQMasked512, ssa.OpAMD64VSUBPSMasked128, ssa.OpAMD64VSUBPSMasked256, ssa.OpAMD64VSUBPSMasked512, @@ -845,36 +845,60 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { case ssa.OpAMD64VPSLLW128, ssa.OpAMD64VPSLLW256, + ssa.OpAMD64VPSLLW512, ssa.OpAMD64VPSLLD128, ssa.OpAMD64VPSLLD256, + ssa.OpAMD64VPSLLD512, ssa.OpAMD64VPSLLQ128, ssa.OpAMD64VPSLLQ256, ssa.OpAMD64VPSLLQ512, - ssa.OpAMD64VPSRLW128, - ssa.OpAMD64VPSRLW256, - ssa.OpAMD64VPSRLD128, - ssa.OpAMD64VPSRLD256, - ssa.OpAMD64VPSRLQ128, - ssa.OpAMD64VPSRLQ256, - ssa.OpAMD64VPSRLQ512, ssa.OpAMD64VPSRAW128, ssa.OpAMD64VPSRAW256, + ssa.OpAMD64VPSRAW512, ssa.OpAMD64VPSRAD128, ssa.OpAMD64VPSRAD256, + ssa.OpAMD64VPSRAD512, ssa.OpAMD64VPSRAQ128, ssa.OpAMD64VPSRAQ256, - ssa.OpAMD64VPSRAQ512: + ssa.OpAMD64VPSRAQ512, + ssa.OpAMD64VPSRLW128, + ssa.OpAMD64VPSRLW256, + ssa.OpAMD64VPSRLW512, + ssa.OpAMD64VPSRLD128, + ssa.OpAMD64VPSRLD256, + ssa.OpAMD64VPSRLD512, + ssa.OpAMD64VPSRLQ128, + ssa.OpAMD64VPSRLQ256, + ssa.OpAMD64VPSRLQ512: p = simdVfpv(s, v) - case ssa.OpAMD64VPSLLQMasked128, + case ssa.OpAMD64VPSLLWMasked128, + ssa.OpAMD64VPSLLWMasked256, + ssa.OpAMD64VPSLLWMasked512, + ssa.OpAMD64VPSLLDMasked128, + ssa.OpAMD64VPSLLDMasked256, + ssa.OpAMD64VPSLLDMasked512, + ssa.OpAMD64VPSLLQMasked128, ssa.OpAMD64VPSLLQMasked256, ssa.OpAMD64VPSLLQMasked512, - ssa.OpAMD64VPSRLQMasked128, - ssa.OpAMD64VPSRLQMasked256, - ssa.OpAMD64VPSRLQMasked512, + ssa.OpAMD64VPSRAWMasked128, + ssa.OpAMD64VPSRAWMasked256, + ssa.OpAMD64VPSRAWMasked512, + ssa.OpAMD64VPSRADMasked128, + ssa.OpAMD64VPSRADMasked256, + ssa.OpAMD64VPSRADMasked512, ssa.OpAMD64VPSRAQMasked128, ssa.OpAMD64VPSRAQMasked256, - ssa.OpAMD64VPSRAQMasked512: + ssa.OpAMD64VPSRAQMasked512, + ssa.OpAMD64VPSRLWMasked128, + ssa.OpAMD64VPSRLWMasked256, + ssa.OpAMD64VPSRLWMasked512, + ssa.OpAMD64VPSRLDMasked128, + ssa.OpAMD64VPSRLDMasked256, + ssa.OpAMD64VPSRLDMasked512, + ssa.OpAMD64VPSRLQMasked128, + ssa.OpAMD64VPSRLQMasked256, + ssa.OpAMD64VPSRLQMasked512: p = simdVfpkv(s, v) case ssa.OpAMD64VPINSRB128, @@ -1198,6 +1222,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPSHLDQMasked128, ssa.OpAMD64VPSHLDQMasked256, ssa.OpAMD64VPSHLDQMasked512, + ssa.OpAMD64VPSLLWMasked128, + ssa.OpAMD64VPSLLWMasked256, + ssa.OpAMD64VPSLLWMasked512, + ssa.OpAMD64VPSLLDMasked128, + ssa.OpAMD64VPSLLDMasked256, + ssa.OpAMD64VPSLLDMasked512, ssa.OpAMD64VPSLLQMasked128, ssa.OpAMD64VPSLLQMasked256, ssa.OpAMD64VPSLLQMasked512, @@ -1210,12 +1240,24 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPSHRDQMasked128, ssa.OpAMD64VPSHRDQMasked256, ssa.OpAMD64VPSHRDQMasked512, - ssa.OpAMD64VPSRLQMasked128, - ssa.OpAMD64VPSRLQMasked256, - ssa.OpAMD64VPSRLQMasked512, + ssa.OpAMD64VPSRAWMasked128, + ssa.OpAMD64VPSRAWMasked256, + ssa.OpAMD64VPSRAWMasked512, + ssa.OpAMD64VPSRADMasked128, + ssa.OpAMD64VPSRADMasked256, + ssa.OpAMD64VPSRADMasked512, ssa.OpAMD64VPSRAQMasked128, ssa.OpAMD64VPSRAQMasked256, ssa.OpAMD64VPSRAQMasked512, + ssa.OpAMD64VPSRLWMasked128, + ssa.OpAMD64VPSRLWMasked256, + ssa.OpAMD64VPSRLWMasked512, + ssa.OpAMD64VPSRLDMasked128, + ssa.OpAMD64VPSRLDMasked256, + ssa.OpAMD64VPSRLDMasked512, + ssa.OpAMD64VPSRLQMasked128, + ssa.OpAMD64VPSRLQMasked256, + ssa.OpAMD64VPSRLQMasked512, ssa.OpAMD64VPSHLDVWMasked128, ssa.OpAMD64VPSHLDVWMasked256, ssa.OpAMD64VPSHLDVWMasked512, @@ -1243,15 +1285,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPSHRDVQMasked128, ssa.OpAMD64VPSHRDVQMasked256, ssa.OpAMD64VPSHRDVQMasked512, - ssa.OpAMD64VPSRLVWMasked128, - ssa.OpAMD64VPSRLVWMasked256, - ssa.OpAMD64VPSRLVWMasked512, - ssa.OpAMD64VPSRLVDMasked128, - ssa.OpAMD64VPSRLVDMasked256, - ssa.OpAMD64VPSRLVDMasked512, - ssa.OpAMD64VPSRLVQMasked128, - ssa.OpAMD64VPSRLVQMasked256, - ssa.OpAMD64VPSRLVQMasked512, ssa.OpAMD64VPSRAVWMasked128, ssa.OpAMD64VPSRAVWMasked256, ssa.OpAMD64VPSRAVWMasked512, @@ -1261,6 +1294,15 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPSRAVQMasked128, ssa.OpAMD64VPSRAVQMasked256, ssa.OpAMD64VPSRAVQMasked512, + ssa.OpAMD64VPSRLVWMasked128, + ssa.OpAMD64VPSRLVWMasked256, + ssa.OpAMD64VPSRLVWMasked512, + ssa.OpAMD64VPSRLVDMasked128, + ssa.OpAMD64VPSRLVDMasked256, + ssa.OpAMD64VPSRLVDMasked512, + ssa.OpAMD64VPSRLVQMasked128, + ssa.OpAMD64VPSRLVQMasked256, + ssa.OpAMD64VPSRLVQMasked512, ssa.OpAMD64VSQRTPSMasked128, ssa.OpAMD64VSQRTPSMasked256, ssa.OpAMD64VSQRTPSMasked512, diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules index 7ac4df5958..6043edad70 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules +++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules @@ -1239,15 +1239,19 @@ (SetElemUint64x2 ...) => (VPINSRQ128 ...) (ShiftAllLeftInt16x8 ...) => (VPSLLW128 ...) (ShiftAllLeftInt16x16 ...) => (VPSLLW256 ...) +(ShiftAllLeftInt16x32 ...) => (VPSLLW512 ...) (ShiftAllLeftInt32x4 ...) => (VPSLLD128 ...) (ShiftAllLeftInt32x8 ...) => (VPSLLD256 ...) +(ShiftAllLeftInt32x16 ...) => (VPSLLD512 ...) (ShiftAllLeftInt64x2 ...) => (VPSLLQ128 ...) (ShiftAllLeftInt64x4 ...) => (VPSLLQ256 ...) (ShiftAllLeftInt64x8 ...) => (VPSLLQ512 ...) (ShiftAllLeftUint16x8 ...) => (VPSLLW128 ...) (ShiftAllLeftUint16x16 ...) => (VPSLLW256 ...) +(ShiftAllLeftUint16x32 ...) => (VPSLLW512 ...) (ShiftAllLeftUint32x4 ...) => (VPSLLD128 ...) (ShiftAllLeftUint32x8 ...) => (VPSLLD256 ...) +(ShiftAllLeftUint32x16 ...) => (VPSLLD512 ...) (ShiftAllLeftUint64x2 ...) => (VPSLLQ128 ...) (ShiftAllLeftUint64x4 ...) => (VPSLLQ256 ...) (ShiftAllLeftUint64x8 ...) => (VPSLLQ512 ...) @@ -1287,23 +1291,39 @@ (ShiftAllLeftAndFillUpperFromMaskedUint64x2 [a] x y mask) => (VPSHLDQMasked128 [a] x y (VPMOVVec64x2ToM mask)) (ShiftAllLeftAndFillUpperFromMaskedUint64x4 [a] x y mask) => (VPSHLDQMasked256 [a] x y (VPMOVVec64x4ToM mask)) (ShiftAllLeftAndFillUpperFromMaskedUint64x8 [a] x y mask) => (VPSHLDQMasked512 [a] x y (VPMOVVec64x8ToM mask)) +(ShiftAllLeftMaskedInt16x8 x y mask) => (VPSLLWMasked128 x y (VPMOVVec16x8ToM mask)) +(ShiftAllLeftMaskedInt16x16 x y mask) => (VPSLLWMasked256 x y (VPMOVVec16x16ToM mask)) +(ShiftAllLeftMaskedInt16x32 x y mask) => (VPSLLWMasked512 x y (VPMOVVec16x32ToM mask)) +(ShiftAllLeftMaskedInt32x4 x y mask) => (VPSLLDMasked128 x y (VPMOVVec32x4ToM mask)) +(ShiftAllLeftMaskedInt32x8 x y mask) => (VPSLLDMasked256 x y (VPMOVVec32x8ToM mask)) +(ShiftAllLeftMaskedInt32x16 x y mask) => (VPSLLDMasked512 x y (VPMOVVec32x16ToM mask)) (ShiftAllLeftMaskedInt64x2 x y mask) => (VPSLLQMasked128 x y (VPMOVVec64x2ToM mask)) (ShiftAllLeftMaskedInt64x4 x y mask) => (VPSLLQMasked256 x y (VPMOVVec64x4ToM mask)) (ShiftAllLeftMaskedInt64x8 x y mask) => (VPSLLQMasked512 x y (VPMOVVec64x8ToM mask)) +(ShiftAllLeftMaskedUint16x8 x y mask) => (VPSLLWMasked128 x y (VPMOVVec16x8ToM mask)) +(ShiftAllLeftMaskedUint16x16 x y mask) => (VPSLLWMasked256 x y (VPMOVVec16x16ToM mask)) +(ShiftAllLeftMaskedUint16x32 x y mask) => (VPSLLWMasked512 x y (VPMOVVec16x32ToM mask)) +(ShiftAllLeftMaskedUint32x4 x y mask) => (VPSLLDMasked128 x y (VPMOVVec32x4ToM mask)) +(ShiftAllLeftMaskedUint32x8 x y mask) => (VPSLLDMasked256 x y (VPMOVVec32x8ToM mask)) +(ShiftAllLeftMaskedUint32x16 x y mask) => (VPSLLDMasked512 x y (VPMOVVec32x16ToM mask)) (ShiftAllLeftMaskedUint64x2 x y mask) => (VPSLLQMasked128 x y (VPMOVVec64x2ToM mask)) (ShiftAllLeftMaskedUint64x4 x y mask) => (VPSLLQMasked256 x y (VPMOVVec64x4ToM mask)) (ShiftAllLeftMaskedUint64x8 x y mask) => (VPSLLQMasked512 x y (VPMOVVec64x8ToM mask)) -(ShiftAllRightInt16x8 ...) => (VPSRLW128 ...) -(ShiftAllRightInt16x16 ...) => (VPSRLW256 ...) -(ShiftAllRightInt32x4 ...) => (VPSRLD128 ...) -(ShiftAllRightInt32x8 ...) => (VPSRLD256 ...) -(ShiftAllRightInt64x2 ...) => (VPSRLQ128 ...) -(ShiftAllRightInt64x4 ...) => (VPSRLQ256 ...) -(ShiftAllRightInt64x8 ...) => (VPSRLQ512 ...) +(ShiftAllRightInt16x8 ...) => (VPSRAW128 ...) +(ShiftAllRightInt16x16 ...) => (VPSRAW256 ...) +(ShiftAllRightInt16x32 ...) => (VPSRAW512 ...) +(ShiftAllRightInt32x4 ...) => (VPSRAD128 ...) +(ShiftAllRightInt32x8 ...) => (VPSRAD256 ...) +(ShiftAllRightInt32x16 ...) => (VPSRAD512 ...) +(ShiftAllRightInt64x2 ...) => (VPSRAQ128 ...) +(ShiftAllRightInt64x4 ...) => (VPSRAQ256 ...) +(ShiftAllRightInt64x8 ...) => (VPSRAQ512 ...) (ShiftAllRightUint16x8 ...) => (VPSRLW128 ...) (ShiftAllRightUint16x16 ...) => (VPSRLW256 ...) +(ShiftAllRightUint16x32 ...) => (VPSRLW512 ...) (ShiftAllRightUint32x4 ...) => (VPSRLD128 ...) (ShiftAllRightUint32x8 ...) => (VPSRLD256 ...) +(ShiftAllRightUint32x16 ...) => (VPSRLD512 ...) (ShiftAllRightUint64x2 ...) => (VPSRLQ128 ...) (ShiftAllRightUint64x4 ...) => (VPSRLQ256 ...) (ShiftAllRightUint64x8 ...) => (VPSRLQ512 ...) @@ -1343,22 +1363,24 @@ (ShiftAllRightAndFillUpperFromMaskedUint64x2 [a] x y mask) => (VPSHRDQMasked128 [a] x y (VPMOVVec64x2ToM mask)) (ShiftAllRightAndFillUpperFromMaskedUint64x4 [a] x y mask) => (VPSHRDQMasked256 [a] x y (VPMOVVec64x4ToM mask)) (ShiftAllRightAndFillUpperFromMaskedUint64x8 [a] x y mask) => (VPSHRDQMasked512 [a] x y (VPMOVVec64x8ToM mask)) -(ShiftAllRightMaskedInt64x2 x y mask) => (VPSRLQMasked128 x y (VPMOVVec64x2ToM mask)) -(ShiftAllRightMaskedInt64x4 x y mask) => (VPSRLQMasked256 x y (VPMOVVec64x4ToM mask)) -(ShiftAllRightMaskedInt64x8 x y mask) => (VPSRLQMasked512 x y (VPMOVVec64x8ToM mask)) +(ShiftAllRightMaskedInt16x8 x y mask) => (VPSRAWMasked128 x y (VPMOVVec16x8ToM mask)) +(ShiftAllRightMaskedInt16x16 x y mask) => (VPSRAWMasked256 x y (VPMOVVec16x16ToM mask)) +(ShiftAllRightMaskedInt16x32 x y mask) => (VPSRAWMasked512 x y (VPMOVVec16x32ToM mask)) +(ShiftAllRightMaskedInt32x4 x y mask) => (VPSRADMasked128 x y (VPMOVVec32x4ToM mask)) +(ShiftAllRightMaskedInt32x8 x y mask) => (VPSRADMasked256 x y (VPMOVVec32x8ToM mask)) +(ShiftAllRightMaskedInt32x16 x y mask) => (VPSRADMasked512 x y (VPMOVVec32x16ToM mask)) +(ShiftAllRightMaskedInt64x2 x y mask) => (VPSRAQMasked128 x y (VPMOVVec64x2ToM mask)) +(ShiftAllRightMaskedInt64x4 x y mask) => (VPSRAQMasked256 x y (VPMOVVec64x4ToM mask)) +(ShiftAllRightMaskedInt64x8 x y mask) => (VPSRAQMasked512 x y (VPMOVVec64x8ToM mask)) +(ShiftAllRightMaskedUint16x8 x y mask) => (VPSRLWMasked128 x y (VPMOVVec16x8ToM mask)) +(ShiftAllRightMaskedUint16x16 x y mask) => (VPSRLWMasked256 x y (VPMOVVec16x16ToM mask)) +(ShiftAllRightMaskedUint16x32 x y mask) => (VPSRLWMasked512 x y (VPMOVVec16x32ToM mask)) +(ShiftAllRightMaskedUint32x4 x y mask) => (VPSRLDMasked128 x y (VPMOVVec32x4ToM mask)) +(ShiftAllRightMaskedUint32x8 x y mask) => (VPSRLDMasked256 x y (VPMOVVec32x8ToM mask)) +(ShiftAllRightMaskedUint32x16 x y mask) => (VPSRLDMasked512 x y (VPMOVVec32x16ToM mask)) (ShiftAllRightMaskedUint64x2 x y mask) => (VPSRLQMasked128 x y (VPMOVVec64x2ToM mask)) (ShiftAllRightMaskedUint64x4 x y mask) => (VPSRLQMasked256 x y (VPMOVVec64x4ToM mask)) (ShiftAllRightMaskedUint64x8 x y mask) => (VPSRLQMasked512 x y (VPMOVVec64x8ToM mask)) -(ShiftAllRightSignExtendedInt16x8 ...) => (VPSRAW128 ...) -(ShiftAllRightSignExtendedInt16x16 ...) => (VPSRAW256 ...) -(ShiftAllRightSignExtendedInt32x4 ...) => (VPSRAD128 ...) -(ShiftAllRightSignExtendedInt32x8 ...) => (VPSRAD256 ...) -(ShiftAllRightSignExtendedInt64x2 ...) => (VPSRAQ128 ...) -(ShiftAllRightSignExtendedInt64x4 ...) => (VPSRAQ256 ...) -(ShiftAllRightSignExtendedInt64x8 ...) => (VPSRAQ512 ...) -(ShiftAllRightSignExtendedMaskedInt64x2 x y mask) => (VPSRAQMasked128 x y (VPMOVVec64x2ToM mask)) -(ShiftAllRightSignExtendedMaskedInt64x4 x y mask) => (VPSRAQMasked256 x y (VPMOVVec64x4ToM mask)) -(ShiftAllRightSignExtendedMaskedInt64x8 x y mask) => (VPSRAQMasked512 x y (VPMOVVec64x8ToM mask)) (ShiftLeftInt16x8 ...) => (VPSLLVW128 ...) (ShiftLeftInt16x16 ...) => (VPSLLVW256 ...) (ShiftLeftInt16x32 ...) => (VPSLLVW512 ...) @@ -1431,15 +1453,15 @@ (ShiftLeftMaskedUint64x2 x y mask) => (VPSLLVQMasked128 x y (VPMOVVec64x2ToM mask)) (ShiftLeftMaskedUint64x4 x y mask) => (VPSLLVQMasked256 x y (VPMOVVec64x4ToM mask)) (ShiftLeftMaskedUint64x8 x y mask) => (VPSLLVQMasked512 x y (VPMOVVec64x8ToM mask)) -(ShiftRightInt16x8 ...) => (VPSRLVW128 ...) -(ShiftRightInt16x16 ...) => (VPSRLVW256 ...) -(ShiftRightInt16x32 ...) => (VPSRLVW512 ...) -(ShiftRightInt32x4 ...) => (VPSRLVD128 ...) -(ShiftRightInt32x8 ...) => (VPSRLVD256 ...) -(ShiftRightInt32x16 ...) => (VPSRLVD512 ...) -(ShiftRightInt64x2 ...) => (VPSRLVQ128 ...) -(ShiftRightInt64x4 ...) => (VPSRLVQ256 ...) -(ShiftRightInt64x8 ...) => (VPSRLVQ512 ...) +(ShiftRightInt16x8 ...) => (VPSRAVW128 ...) +(ShiftRightInt16x16 ...) => (VPSRAVW256 ...) +(ShiftRightInt16x32 ...) => (VPSRAVW512 ...) +(ShiftRightInt32x4 ...) => (VPSRAVD128 ...) +(ShiftRightInt32x8 ...) => (VPSRAVD256 ...) +(ShiftRightInt32x16 ...) => (VPSRAVD512 ...) +(ShiftRightInt64x2 ...) => (VPSRAVQ128 ...) +(ShiftRightInt64x4 ...) => (VPSRAVQ256 ...) +(ShiftRightInt64x8 ...) => (VPSRAVQ512 ...) (ShiftRightUint16x8 ...) => (VPSRLVW128 ...) (ShiftRightUint16x16 ...) => (VPSRLVW256 ...) (ShiftRightUint16x32 ...) => (VPSRLVW512 ...) @@ -1485,15 +1507,15 @@ (ShiftRightAndFillUpperFromMaskedUint64x2 x y z mask) => (VPSHRDVQMasked128 x y z (VPMOVVec64x2ToM mask)) (ShiftRightAndFillUpperFromMaskedUint64x4 x y z mask) => (VPSHRDVQMasked256 x y z (VPMOVVec64x4ToM mask)) (ShiftRightAndFillUpperFromMaskedUint64x8 x y z mask) => (VPSHRDVQMasked512 x y z (VPMOVVec64x8ToM mask)) -(ShiftRightMaskedInt16x8 x y mask) => (VPSRLVWMasked128 x y (VPMOVVec16x8ToM mask)) -(ShiftRightMaskedInt16x16 x y mask) => (VPSRLVWMasked256 x y (VPMOVVec16x16ToM mask)) -(ShiftRightMaskedInt16x32 x y mask) => (VPSRLVWMasked512 x y (VPMOVVec16x32ToM mask)) -(ShiftRightMaskedInt32x4 x y mask) => (VPSRLVDMasked128 x y (VPMOVVec32x4ToM mask)) -(ShiftRightMaskedInt32x8 x y mask) => (VPSRLVDMasked256 x y (VPMOVVec32x8ToM mask)) -(ShiftRightMaskedInt32x16 x y mask) => (VPSRLVDMasked512 x y (VPMOVVec32x16ToM mask)) -(ShiftRightMaskedInt64x2 x y mask) => (VPSRLVQMasked128 x y (VPMOVVec64x2ToM mask)) -(ShiftRightMaskedInt64x4 x y mask) => (VPSRLVQMasked256 x y (VPMOVVec64x4ToM mask)) -(ShiftRightMaskedInt64x8 x y mask) => (VPSRLVQMasked512 x y (VPMOVVec64x8ToM mask)) +(ShiftRightMaskedInt16x8 x y mask) => (VPSRAVWMasked128 x y (VPMOVVec16x8ToM mask)) +(ShiftRightMaskedInt16x16 x y mask) => (VPSRAVWMasked256 x y (VPMOVVec16x16ToM mask)) +(ShiftRightMaskedInt16x32 x y mask) => (VPSRAVWMasked512 x y (VPMOVVec16x32ToM mask)) +(ShiftRightMaskedInt32x4 x y mask) => (VPSRAVDMasked128 x y (VPMOVVec32x4ToM mask)) +(ShiftRightMaskedInt32x8 x y mask) => (VPSRAVDMasked256 x y (VPMOVVec32x8ToM mask)) +(ShiftRightMaskedInt32x16 x y mask) => (VPSRAVDMasked512 x y (VPMOVVec32x16ToM mask)) +(ShiftRightMaskedInt64x2 x y mask) => (VPSRAVQMasked128 x y (VPMOVVec64x2ToM mask)) +(ShiftRightMaskedInt64x4 x y mask) => (VPSRAVQMasked256 x y (VPMOVVec64x4ToM mask)) +(ShiftRightMaskedInt64x8 x y mask) => (VPSRAVQMasked512 x y (VPMOVVec64x8ToM mask)) (ShiftRightMaskedUint16x8 x y mask) => (VPSRLVWMasked128 x y (VPMOVVec16x8ToM mask)) (ShiftRightMaskedUint16x16 x y mask) => (VPSRLVWMasked256 x y (VPMOVVec16x16ToM mask)) (ShiftRightMaskedUint16x32 x y mask) => (VPSRLVWMasked512 x y (VPMOVVec16x32ToM mask)) @@ -1503,42 +1525,6 @@ (ShiftRightMaskedUint64x2 x y mask) => (VPSRLVQMasked128 x y (VPMOVVec64x2ToM mask)) (ShiftRightMaskedUint64x4 x y mask) => (VPSRLVQMasked256 x y (VPMOVVec64x4ToM mask)) (ShiftRightMaskedUint64x8 x y mask) => (VPSRLVQMasked512 x y (VPMOVVec64x8ToM mask)) -(ShiftRightSignExtendedInt16x8 ...) => (VPSRAVW128 ...) -(ShiftRightSignExtendedInt16x16 ...) => (VPSRAVW256 ...) -(ShiftRightSignExtendedInt16x32 ...) => (VPSRAVW512 ...) -(ShiftRightSignExtendedInt32x4 ...) => (VPSRAVD128 ...) -(ShiftRightSignExtendedInt32x8 ...) => (VPSRAVD256 ...) -(ShiftRightSignExtendedInt32x16 ...) => (VPSRAVD512 ...) -(ShiftRightSignExtendedInt64x2 ...) => (VPSRAVQ128 ...) -(ShiftRightSignExtendedInt64x4 ...) => (VPSRAVQ256 ...) -(ShiftRightSignExtendedInt64x8 ...) => (VPSRAVQ512 ...) -(ShiftRightSignExtendedUint16x8 ...) => (VPSRAVW128 ...) -(ShiftRightSignExtendedUint16x16 ...) => (VPSRAVW256 ...) -(ShiftRightSignExtendedUint16x32 ...) => (VPSRAVW512 ...) -(ShiftRightSignExtendedUint32x4 ...) => (VPSRAVD128 ...) -(ShiftRightSignExtendedUint32x8 ...) => (VPSRAVD256 ...) -(ShiftRightSignExtendedUint32x16 ...) => (VPSRAVD512 ...) -(ShiftRightSignExtendedUint64x2 ...) => (VPSRAVQ128 ...) -(ShiftRightSignExtendedUint64x4 ...) => (VPSRAVQ256 ...) -(ShiftRightSignExtendedUint64x8 ...) => (VPSRAVQ512 ...) -(ShiftRightSignExtendedMaskedInt16x8 x y mask) => (VPSRAVWMasked128 x y (VPMOVVec16x8ToM mask)) -(ShiftRightSignExtendedMaskedInt16x16 x y mask) => (VPSRAVWMasked256 x y (VPMOVVec16x16ToM mask)) -(ShiftRightSignExtendedMaskedInt16x32 x y mask) => (VPSRAVWMasked512 x y (VPMOVVec16x32ToM mask)) -(ShiftRightSignExtendedMaskedInt32x4 x y mask) => (VPSRAVDMasked128 x y (VPMOVVec32x4ToM mask)) -(ShiftRightSignExtendedMaskedInt32x8 x y mask) => (VPSRAVDMasked256 x y (VPMOVVec32x8ToM mask)) -(ShiftRightSignExtendedMaskedInt32x16 x y mask) => (VPSRAVDMasked512 x y (VPMOVVec32x16ToM mask)) -(ShiftRightSignExtendedMaskedInt64x2 x y mask) => (VPSRAVQMasked128 x y (VPMOVVec64x2ToM mask)) -(ShiftRightSignExtendedMaskedInt64x4 x y mask) => (VPSRAVQMasked256 x y (VPMOVVec64x4ToM mask)) -(ShiftRightSignExtendedMaskedInt64x8 x y mask) => (VPSRAVQMasked512 x y (VPMOVVec64x8ToM mask)) -(ShiftRightSignExtendedMaskedUint16x8 x y mask) => (VPSRAVWMasked128 x y (VPMOVVec16x8ToM mask)) -(ShiftRightSignExtendedMaskedUint16x16 x y mask) => (VPSRAVWMasked256 x y (VPMOVVec16x16ToM mask)) -(ShiftRightSignExtendedMaskedUint16x32 x y mask) => (VPSRAVWMasked512 x y (VPMOVVec16x32ToM mask)) -(ShiftRightSignExtendedMaskedUint32x4 x y mask) => (VPSRAVDMasked128 x y (VPMOVVec32x4ToM mask)) -(ShiftRightSignExtendedMaskedUint32x8 x y mask) => (VPSRAVDMasked256 x y (VPMOVVec32x8ToM mask)) -(ShiftRightSignExtendedMaskedUint32x16 x y mask) => (VPSRAVDMasked512 x y (VPMOVVec32x16ToM mask)) -(ShiftRightSignExtendedMaskedUint64x2 x y mask) => (VPSRAVQMasked128 x y (VPMOVVec64x2ToM mask)) -(ShiftRightSignExtendedMaskedUint64x4 x y mask) => (VPSRAVQMasked256 x y (VPMOVVec64x4ToM mask)) -(ShiftRightSignExtendedMaskedUint64x8 x y mask) => (VPSRAVQMasked512 x y (VPMOVVec64x8ToM mask)) (SignInt8x16 ...) => (VPSIGNB128 ...) (SignInt8x32 ...) => (VPSIGNB256 ...) (SignInt16x8 ...) => (VPSIGNW128 ...) diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go index f0a149f7d8..3f777db5b7 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go +++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go @@ -198,17 +198,16 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VPSUBSW256", argLength: 2, reg: v21, asm: "VPSUBSW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSUBSWMasked256", argLength: 3, reg: w2kw, asm: "VPSUBSW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSLLW256", argLength: 2, reg: vfpv, asm: "VPSLLW", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPSRLW256", argLength: 2, reg: vfpv, asm: "VPSRLW", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSLLWMasked256", argLength: 3, reg: wfpkw, asm: "VPSLLW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSRAW256", argLength: 2, reg: vfpv, asm: "VPSRAW", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSRAWMasked256", argLength: 3, reg: wfpkw, asm: "VPSRAW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSLLVW256", argLength: 2, reg: w21, asm: "VPSLLVW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSHLDVW256", argLength: 3, reg: w31, asm: "VPSHLDVW", commutative: false, typ: "Vec256", resultInArg0: true}, {name: "VPSHLDVWMasked256", argLength: 4, reg: w3kw, asm: "VPSHLDVW", commutative: false, typ: "Vec256", resultInArg0: true}, {name: "VPSLLVWMasked256", argLength: 3, reg: w2kw, asm: "VPSLLVW", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPSRLVW256", argLength: 2, reg: w21, asm: "VPSRLVW", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSRAVW256", argLength: 2, reg: w21, asm: "VPSRAVW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSHRDVW256", argLength: 3, reg: w31, asm: "VPSHRDVW", commutative: false, typ: "Vec256", resultInArg0: true}, {name: "VPSHRDVWMasked256", argLength: 4, reg: w3kw, asm: "VPSHRDVW", commutative: false, typ: "Vec256", resultInArg0: true}, - {name: "VPSRLVWMasked256", argLength: 3, reg: w2kw, asm: "VPSRLVW", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPSRAVW256", argLength: 2, reg: w21, asm: "VPSRAVW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSRAVWMasked256", argLength: 3, reg: w2kw, asm: "VPSRAVW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSIGNW256", argLength: 2, reg: v21, asm: "VPSIGNW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSUBW256", argLength: 2, reg: v21, asm: "VPSUBW", commutative: false, typ: "Vec256", resultInArg0: false}, @@ -233,15 +232,17 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VPADDSWMasked512", argLength: 3, reg: w2kw, asm: "VPADDSW", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPSUBSW512", argLength: 2, reg: w21, asm: "VPSUBSW", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSUBSWMasked512", argLength: 3, reg: w2kw, asm: "VPSUBSW", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSLLW512", argLength: 2, reg: wfpw, asm: "VPSLLW", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSLLWMasked512", argLength: 3, reg: wfpkw, asm: "VPSLLW", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSRAW512", argLength: 2, reg: wfpw, asm: "VPSRAW", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSRAWMasked512", argLength: 3, reg: wfpkw, asm: "VPSRAW", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSLLVW512", argLength: 2, reg: w21, asm: "VPSLLVW", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSHLDVW512", argLength: 3, reg: w31, asm: "VPSHLDVW", commutative: false, typ: "Vec512", resultInArg0: true}, {name: "VPSHLDVWMasked512", argLength: 4, reg: w3kw, asm: "VPSHLDVW", commutative: false, typ: "Vec512", resultInArg0: true}, {name: "VPSLLVWMasked512", argLength: 3, reg: w2kw, asm: "VPSLLVW", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPSRLVW512", argLength: 2, reg: w21, asm: "VPSRLVW", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSRAVW512", argLength: 2, reg: w21, asm: "VPSRAVW", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSHRDVW512", argLength: 3, reg: w31, asm: "VPSHRDVW", commutative: false, typ: "Vec512", resultInArg0: true}, {name: "VPSHRDVWMasked512", argLength: 4, reg: w3kw, asm: "VPSHRDVW", commutative: false, typ: "Vec512", resultInArg0: true}, - {name: "VPSRLVWMasked512", argLength: 3, reg: w2kw, asm: "VPSRLVW", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPSRAVW512", argLength: 2, reg: w21, asm: "VPSRAVW", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSRAVWMasked512", argLength: 3, reg: w2kw, asm: "VPSRAVW", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSUBW512", argLength: 2, reg: w21, asm: "VPSUBW", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSUBWMasked512", argLength: 3, reg: w2kw, asm: "VPSUBW", commutative: false, typ: "Vec512", resultInArg0: false}, @@ -272,17 +273,16 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VPSUBSW128", argLength: 2, reg: v21, asm: "VPSUBSW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSUBSWMasked128", argLength: 3, reg: w2kw, asm: "VPSUBSW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSLLW128", argLength: 2, reg: vfpv, asm: "VPSLLW", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPSRLW128", argLength: 2, reg: vfpv, asm: "VPSRLW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSLLWMasked128", argLength: 3, reg: wfpkw, asm: "VPSLLW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSRAW128", argLength: 2, reg: vfpv, asm: "VPSRAW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSRAWMasked128", argLength: 3, reg: wfpkw, asm: "VPSRAW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSLLVW128", argLength: 2, reg: w21, asm: "VPSLLVW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSHLDVW128", argLength: 3, reg: w31, asm: "VPSHLDVW", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPSHLDVWMasked128", argLength: 4, reg: w3kw, asm: "VPSHLDVW", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPSLLVWMasked128", argLength: 3, reg: w2kw, asm: "VPSLLVW", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPSRLVW128", argLength: 2, reg: w21, asm: "VPSRLVW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSRAVW128", argLength: 2, reg: w21, asm: "VPSRAVW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSHRDVW128", argLength: 3, reg: w31, asm: "VPSHRDVW", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPSHRDVWMasked128", argLength: 4, reg: w3kw, asm: "VPSHRDVW", commutative: false, typ: "Vec128", resultInArg0: true}, - {name: "VPSRLVWMasked128", argLength: 3, reg: w2kw, asm: "VPSRLVW", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPSRAVW128", argLength: 2, reg: w21, asm: "VPSRAVW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSRAVWMasked128", argLength: 3, reg: w2kw, asm: "VPSRAVW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSIGNW128", argLength: 2, reg: v21, asm: "VPSIGNW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSUBW128", argLength: 2, reg: v21, asm: "VPSUBW", commutative: false, typ: "Vec128", resultInArg0: false}, @@ -315,15 +315,17 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VPDPWSSDSMasked512", argLength: 4, reg: w3kw, asm: "VPDPWSSDS", commutative: false, typ: "Vec512", resultInArg0: true}, {name: "VPDPBUSDS512", argLength: 3, reg: w31, asm: "VPDPBUSDS", commutative: false, typ: "Vec512", resultInArg0: true}, {name: "VPDPBUSDSMasked512", argLength: 4, reg: w3kw, asm: "VPDPBUSDS", commutative: false, typ: "Vec512", resultInArg0: true}, + {name: "VPSLLD512", argLength: 2, reg: wfpw, asm: "VPSLLD", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSLLDMasked512", argLength: 3, reg: wfpkw, asm: "VPSLLD", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSRAD512", argLength: 2, reg: wfpw, asm: "VPSRAD", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSRADMasked512", argLength: 3, reg: wfpkw, asm: "VPSRAD", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSLLVD512", argLength: 2, reg: w21, asm: "VPSLLVD", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSHLDVD512", argLength: 3, reg: w31, asm: "VPSHLDVD", commutative: false, typ: "Vec512", resultInArg0: true}, {name: "VPSHLDVDMasked512", argLength: 4, reg: w3kw, asm: "VPSHLDVD", commutative: false, typ: "Vec512", resultInArg0: true}, {name: "VPSLLVDMasked512", argLength: 3, reg: w2kw, asm: "VPSLLVD", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPSRLVD512", argLength: 2, reg: w21, asm: "VPSRLVD", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSRAVD512", argLength: 2, reg: w21, asm: "VPSRAVD", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSHRDVD512", argLength: 3, reg: w31, asm: "VPSHRDVD", commutative: false, typ: "Vec512", resultInArg0: true}, {name: "VPSHRDVDMasked512", argLength: 4, reg: w3kw, asm: "VPSHRDVD", commutative: false, typ: "Vec512", resultInArg0: true}, - {name: "VPSRLVDMasked512", argLength: 3, reg: w2kw, asm: "VPSRLVD", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPSRAVD512", argLength: 2, reg: w21, asm: "VPSRAVD", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSRAVDMasked512", argLength: 3, reg: w2kw, asm: "VPSRAVD", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSUBD512", argLength: 2, reg: w21, asm: "VPSUBD", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSUBDMasked512", argLength: 3, reg: w2kw, asm: "VPSUBD", commutative: false, typ: "Vec512", resultInArg0: false}, @@ -362,17 +364,16 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VPDPBUSDS128", argLength: 3, reg: v31, asm: "VPDPBUSDS", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPDPBUSDSMasked128", argLength: 4, reg: w3kw, asm: "VPDPBUSDS", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPSLLD128", argLength: 2, reg: vfpv, asm: "VPSLLD", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPSRLD128", argLength: 2, reg: vfpv, asm: "VPSRLD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSLLDMasked128", argLength: 3, reg: wfpkw, asm: "VPSLLD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSRAD128", argLength: 2, reg: vfpv, asm: "VPSRAD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSRADMasked128", argLength: 3, reg: wfpkw, asm: "VPSRAD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSLLVD128", argLength: 2, reg: v21, asm: "VPSLLVD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSHLDVD128", argLength: 3, reg: w31, asm: "VPSHLDVD", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPSHLDVDMasked128", argLength: 4, reg: w3kw, asm: "VPSHLDVD", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPSLLVDMasked128", argLength: 3, reg: w2kw, asm: "VPSLLVD", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPSRLVD128", argLength: 2, reg: v21, asm: "VPSRLVD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSRAVD128", argLength: 2, reg: v21, asm: "VPSRAVD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSHRDVD128", argLength: 3, reg: w31, asm: "VPSHRDVD", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPSHRDVDMasked128", argLength: 4, reg: w3kw, asm: "VPSHRDVD", commutative: false, typ: "Vec128", resultInArg0: true}, - {name: "VPSRLVDMasked128", argLength: 3, reg: w2kw, asm: "VPSRLVD", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPSRAVD128", argLength: 2, reg: v21, asm: "VPSRAVD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSRAVDMasked128", argLength: 3, reg: w2kw, asm: "VPSRAVD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSIGND128", argLength: 2, reg: v21, asm: "VPSIGND", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSUBD128", argLength: 2, reg: v21, asm: "VPSUBD", commutative: false, typ: "Vec128", resultInArg0: false}, @@ -411,17 +412,16 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VPDPBUSDS256", argLength: 3, reg: v31, asm: "VPDPBUSDS", commutative: false, typ: "Vec256", resultInArg0: true}, {name: "VPDPBUSDSMasked256", argLength: 4, reg: w3kw, asm: "VPDPBUSDS", commutative: false, typ: "Vec256", resultInArg0: true}, {name: "VPSLLD256", argLength: 2, reg: vfpv, asm: "VPSLLD", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPSRLD256", argLength: 2, reg: vfpv, asm: "VPSRLD", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSLLDMasked256", argLength: 3, reg: wfpkw, asm: "VPSLLD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSRAD256", argLength: 2, reg: vfpv, asm: "VPSRAD", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSRADMasked256", argLength: 3, reg: wfpkw, asm: "VPSRAD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSLLVD256", argLength: 2, reg: v21, asm: "VPSLLVD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSHLDVD256", argLength: 3, reg: w31, asm: "VPSHLDVD", commutative: false, typ: "Vec256", resultInArg0: true}, {name: "VPSHLDVDMasked256", argLength: 4, reg: w3kw, asm: "VPSHLDVD", commutative: false, typ: "Vec256", resultInArg0: true}, {name: "VPSLLVDMasked256", argLength: 3, reg: w2kw, asm: "VPSLLVD", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPSRLVD256", argLength: 2, reg: v21, asm: "VPSRLVD", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSRAVD256", argLength: 2, reg: v21, asm: "VPSRAVD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSHRDVD256", argLength: 3, reg: w31, asm: "VPSHRDVD", commutative: false, typ: "Vec256", resultInArg0: true}, {name: "VPSHRDVDMasked256", argLength: 4, reg: w3kw, asm: "VPSHRDVD", commutative: false, typ: "Vec256", resultInArg0: true}, - {name: "VPSRLVDMasked256", argLength: 3, reg: w2kw, asm: "VPSRLVD", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPSRAVD256", argLength: 2, reg: v21, asm: "VPSRAVD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSRAVDMasked256", argLength: 3, reg: w2kw, asm: "VPSRAVD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSIGND256", argLength: 2, reg: v21, asm: "VPSIGND", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSUBD256", argLength: 2, reg: v21, asm: "VPSUBD", commutative: false, typ: "Vec256", resultInArg0: false}, @@ -453,19 +453,15 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VPRORVQMasked128", argLength: 3, reg: w2kw, asm: "VPRORVQ", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSLLQ128", argLength: 2, reg: vfpv, asm: "VPSLLQ", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSLLQMasked128", argLength: 3, reg: wfpkw, asm: "VPSLLQ", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPSRLQ128", argLength: 2, reg: vfpv, asm: "VPSRLQ", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPSRLQMasked128", argLength: 3, reg: wfpkw, asm: "VPSRLQ", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSRAQ128", argLength: 2, reg: wfpw, asm: "VPSRAQ", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSRAQMasked128", argLength: 3, reg: wfpkw, asm: "VPSRAQ", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSLLVQ128", argLength: 2, reg: v21, asm: "VPSLLVQ", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSHLDVQ128", argLength: 3, reg: w31, asm: "VPSHLDVQ", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPSHLDVQMasked128", argLength: 4, reg: w3kw, asm: "VPSHLDVQ", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPSLLVQMasked128", argLength: 3, reg: w2kw, asm: "VPSLLVQ", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPSRLVQ128", argLength: 2, reg: v21, asm: "VPSRLVQ", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSRAVQ128", argLength: 2, reg: w21, asm: "VPSRAVQ", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSHRDVQ128", argLength: 3, reg: w31, asm: "VPSHRDVQ", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPSHRDVQMasked128", argLength: 4, reg: w3kw, asm: "VPSHRDVQ", commutative: false, typ: "Vec128", resultInArg0: true}, - {name: "VPSRLVQMasked128", argLength: 3, reg: w2kw, asm: "VPSRLVQ", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPSRAVQ128", argLength: 2, reg: w21, asm: "VPSRAVQ", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSRAVQMasked128", argLength: 3, reg: w2kw, asm: "VPSRAVQ", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSUBQ128", argLength: 2, reg: v21, asm: "VPSUBQ", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSUBQMasked128", argLength: 3, reg: w2kw, asm: "VPSUBQ", commutative: false, typ: "Vec128", resultInArg0: false}, @@ -494,19 +490,15 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VPRORVQMasked256", argLength: 3, reg: w2kw, asm: "VPRORVQ", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSLLQ256", argLength: 2, reg: vfpv, asm: "VPSLLQ", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSLLQMasked256", argLength: 3, reg: wfpkw, asm: "VPSLLQ", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPSRLQ256", argLength: 2, reg: vfpv, asm: "VPSRLQ", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPSRLQMasked256", argLength: 3, reg: wfpkw, asm: "VPSRLQ", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSRAQ256", argLength: 2, reg: wfpw, asm: "VPSRAQ", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSRAQMasked256", argLength: 3, reg: wfpkw, asm: "VPSRAQ", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSLLVQ256", argLength: 2, reg: v21, asm: "VPSLLVQ", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSHLDVQ256", argLength: 3, reg: w31, asm: "VPSHLDVQ", commutative: false, typ: "Vec256", resultInArg0: true}, {name: "VPSHLDVQMasked256", argLength: 4, reg: w3kw, asm: "VPSHLDVQ", commutative: false, typ: "Vec256", resultInArg0: true}, {name: "VPSLLVQMasked256", argLength: 3, reg: w2kw, asm: "VPSLLVQ", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPSRLVQ256", argLength: 2, reg: v21, asm: "VPSRLVQ", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSRAVQ256", argLength: 2, reg: w21, asm: "VPSRAVQ", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSHRDVQ256", argLength: 3, reg: w31, asm: "VPSHRDVQ", commutative: false, typ: "Vec256", resultInArg0: true}, {name: "VPSHRDVQMasked256", argLength: 4, reg: w3kw, asm: "VPSHRDVQ", commutative: false, typ: "Vec256", resultInArg0: true}, - {name: "VPSRLVQMasked256", argLength: 3, reg: w2kw, asm: "VPSRLVQ", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPSRAVQ256", argLength: 2, reg: w21, asm: "VPSRAVQ", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSRAVQMasked256", argLength: 3, reg: w2kw, asm: "VPSRAVQ", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSUBQ256", argLength: 2, reg: v21, asm: "VPSUBQ", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSUBQMasked256", argLength: 3, reg: w2kw, asm: "VPSUBQ", commutative: false, typ: "Vec256", resultInArg0: false}, @@ -537,19 +529,15 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VPRORVQMasked512", argLength: 3, reg: w2kw, asm: "VPRORVQ", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSLLQ512", argLength: 2, reg: wfpw, asm: "VPSLLQ", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSLLQMasked512", argLength: 3, reg: wfpkw, asm: "VPSLLQ", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPSRLQ512", argLength: 2, reg: wfpw, asm: "VPSRLQ", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPSRLQMasked512", argLength: 3, reg: wfpkw, asm: "VPSRLQ", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSRAQ512", argLength: 2, reg: wfpw, asm: "VPSRAQ", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSRAQMasked512", argLength: 3, reg: wfpkw, asm: "VPSRAQ", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSLLVQ512", argLength: 2, reg: w21, asm: "VPSLLVQ", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSHLDVQ512", argLength: 3, reg: w31, asm: "VPSHLDVQ", commutative: false, typ: "Vec512", resultInArg0: true}, {name: "VPSHLDVQMasked512", argLength: 4, reg: w3kw, asm: "VPSHLDVQ", commutative: false, typ: "Vec512", resultInArg0: true}, {name: "VPSLLVQMasked512", argLength: 3, reg: w2kw, asm: "VPSLLVQ", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPSRLVQ512", argLength: 2, reg: w21, asm: "VPSRLVQ", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSRAVQ512", argLength: 2, reg: w21, asm: "VPSRAVQ", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSHRDVQ512", argLength: 3, reg: w31, asm: "VPSHRDVQ", commutative: false, typ: "Vec512", resultInArg0: true}, {name: "VPSHRDVQMasked512", argLength: 4, reg: w3kw, asm: "VPSHRDVQ", commutative: false, typ: "Vec512", resultInArg0: true}, - {name: "VPSRLVQMasked512", argLength: 3, reg: w2kw, asm: "VPSRLVQ", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPSRAVQ512", argLength: 2, reg: w21, asm: "VPSRAVQ", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSRAVQMasked512", argLength: 3, reg: w2kw, asm: "VPSRAVQ", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSUBQ512", argLength: 2, reg: w21, asm: "VPSUBQ", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSUBQMasked512", argLength: 3, reg: w2kw, asm: "VPSUBQ", commutative: false, typ: "Vec512", resultInArg0: false}, @@ -625,6 +613,10 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VPMINUWMasked256", argLength: 3, reg: w2kw, asm: "VPMINUW", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMULHUW256", argLength: 2, reg: v21, asm: "VPMULHUW", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMULHUWMasked256", argLength: 3, reg: w2kw, asm: "VPMULHUW", commutative: true, typ: "Vec256", resultInArg0: false}, + {name: "VPSRLW256", argLength: 2, reg: vfpv, asm: "VPSRLW", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSRLWMasked256", argLength: 3, reg: wfpkw, asm: "VPSRLW", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSRLVW256", argLength: 2, reg: w21, asm: "VPSRLVW", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSRLVWMasked256", argLength: 3, reg: w2kw, asm: "VPSRLVW", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPAVGW512", argLength: 2, reg: w21, asm: "VPAVGW", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPAVGWMasked512", argLength: 3, reg: w2kw, asm: "VPAVGW", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMAXUW512", argLength: 2, reg: w21, asm: "VPMAXUW", commutative: true, typ: "Vec512", resultInArg0: false}, @@ -633,6 +625,10 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VPMINUWMasked512", argLength: 3, reg: w2kw, asm: "VPMINUW", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMULHUW512", argLength: 2, reg: w21, asm: "VPMULHUW", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMULHUWMasked512", argLength: 3, reg: w2kw, asm: "VPMULHUW", commutative: true, typ: "Vec512", resultInArg0: false}, + {name: "VPSRLW512", argLength: 2, reg: wfpw, asm: "VPSRLW", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSRLWMasked512", argLength: 3, reg: wfpkw, asm: "VPSRLW", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSRLVW512", argLength: 2, reg: w21, asm: "VPSRLVW", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSRLVWMasked512", argLength: 3, reg: w2kw, asm: "VPSRLVW", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPAVGW128", argLength: 2, reg: v21, asm: "VPAVGW", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPAVGWMasked128", argLength: 3, reg: w2kw, asm: "VPAVGW", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMAXUW128", argLength: 2, reg: v21, asm: "VPMAXUW", commutative: true, typ: "Vec128", resultInArg0: false}, @@ -641,36 +637,64 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VPMINUWMasked128", argLength: 3, reg: w2kw, asm: "VPMINUW", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMULHUW128", argLength: 2, reg: v21, asm: "VPMULHUW", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMULHUWMasked128", argLength: 3, reg: w2kw, asm: "VPMULHUW", commutative: true, typ: "Vec128", resultInArg0: false}, + {name: "VPSRLW128", argLength: 2, reg: vfpv, asm: "VPSRLW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSRLWMasked128", argLength: 3, reg: wfpkw, asm: "VPSRLW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSRLVW128", argLength: 2, reg: w21, asm: "VPSRLVW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSRLVWMasked128", argLength: 3, reg: w2kw, asm: "VPSRLVW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMAXUD512", argLength: 2, reg: w21, asm: "VPMAXUD", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMAXUDMasked512", argLength: 3, reg: w2kw, asm: "VPMAXUD", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMINUD512", argLength: 2, reg: w21, asm: "VPMINUD", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMINUDMasked512", argLength: 3, reg: w2kw, asm: "VPMINUD", commutative: true, typ: "Vec512", resultInArg0: false}, + {name: "VPSRLD512", argLength: 2, reg: wfpw, asm: "VPSRLD", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSRLDMasked512", argLength: 3, reg: wfpkw, asm: "VPSRLD", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSRLVD512", argLength: 2, reg: w21, asm: "VPSRLVD", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSRLVDMasked512", argLength: 3, reg: w2kw, asm: "VPSRLVD", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPMAXUD128", argLength: 2, reg: v21, asm: "VPMAXUD", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMAXUDMasked128", argLength: 3, reg: w2kw, asm: "VPMAXUD", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMINUD128", argLength: 2, reg: v21, asm: "VPMINUD", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMINUDMasked128", argLength: 3, reg: w2kw, asm: "VPMINUD", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMULUDQ128", argLength: 2, reg: v21, asm: "VPMULUDQ", commutative: true, typ: "Vec128", resultInArg0: false}, + {name: "VPSRLD128", argLength: 2, reg: vfpv, asm: "VPSRLD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSRLDMasked128", argLength: 3, reg: wfpkw, asm: "VPSRLD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSRLVD128", argLength: 2, reg: v21, asm: "VPSRLVD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSRLVDMasked128", argLength: 3, reg: w2kw, asm: "VPSRLVD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMAXUD256", argLength: 2, reg: v21, asm: "VPMAXUD", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMAXUDMasked256", argLength: 3, reg: w2kw, asm: "VPMAXUD", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMINUD256", argLength: 2, reg: v21, asm: "VPMINUD", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMINUDMasked256", argLength: 3, reg: w2kw, asm: "VPMINUD", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMULUDQ256", argLength: 2, reg: v21, asm: "VPMULUDQ", commutative: true, typ: "Vec256", resultInArg0: false}, + {name: "VPSRLD256", argLength: 2, reg: vfpv, asm: "VPSRLD", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSRLDMasked256", argLength: 3, reg: wfpkw, asm: "VPSRLD", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSRLVD256", argLength: 2, reg: v21, asm: "VPSRLVD", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSRLVDMasked256", argLength: 3, reg: w2kw, asm: "VPSRLVD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPMAXUQ128", argLength: 2, reg: w21, asm: "VPMAXUQ", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMAXUQMasked128", argLength: 3, reg: w2kw, asm: "VPMAXUQ", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMINUQ128", argLength: 2, reg: w21, asm: "VPMINUQ", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMINUQMasked128", argLength: 3, reg: w2kw, asm: "VPMINUQ", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMULUDQMasked128", argLength: 3, reg: w2kw, asm: "VPMULUDQ", commutative: true, typ: "Vec128", resultInArg0: false}, + {name: "VPSRLQ128", argLength: 2, reg: vfpv, asm: "VPSRLQ", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSRLQMasked128", argLength: 3, reg: wfpkw, asm: "VPSRLQ", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSRLVQ128", argLength: 2, reg: v21, asm: "VPSRLVQ", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSRLVQMasked128", argLength: 3, reg: w2kw, asm: "VPSRLVQ", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMAXUQ256", argLength: 2, reg: w21, asm: "VPMAXUQ", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMAXUQMasked256", argLength: 3, reg: w2kw, asm: "VPMAXUQ", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMINUQ256", argLength: 2, reg: w21, asm: "VPMINUQ", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMINUQMasked256", argLength: 3, reg: w2kw, asm: "VPMINUQ", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMULUDQMasked256", argLength: 3, reg: w2kw, asm: "VPMULUDQ", commutative: true, typ: "Vec256", resultInArg0: false}, + {name: "VPSRLQ256", argLength: 2, reg: vfpv, asm: "VPSRLQ", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSRLQMasked256", argLength: 3, reg: wfpkw, asm: "VPSRLQ", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSRLVQ256", argLength: 2, reg: v21, asm: "VPSRLVQ", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSRLVQMasked256", argLength: 3, reg: w2kw, asm: "VPSRLVQ", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPMAXUQ512", argLength: 2, reg: w21, asm: "VPMAXUQ", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMAXUQMasked512", argLength: 3, reg: w2kw, asm: "VPMAXUQ", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMINUQ512", argLength: 2, reg: w21, asm: "VPMINUQ", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMINUQMasked512", argLength: 3, reg: w2kw, asm: "VPMINUQ", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMULUDQ512", argLength: 2, reg: w21, asm: "VPMULUDQ", commutative: true, typ: "Vec512", resultInArg0: false}, {name: "VPMULUDQMasked512", argLength: 3, reg: w2kw, asm: "VPMULUDQ", commutative: true, typ: "Vec512", resultInArg0: false}, + {name: "VPSRLQ512", argLength: 2, reg: wfpw, asm: "VPSRLQ", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSRLQMasked512", argLength: 3, reg: wfpkw, asm: "VPSRLQ", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSRLVQ512", argLength: 2, reg: w21, asm: "VPSRLVQ", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSRLVQMasked512", argLength: 3, reg: w2kw, asm: "VPSRLVQ", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPAVGB128", argLength: 2, reg: v21, asm: "VPAVGB", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPAVGBMasked128", argLength: 3, reg: w2kw, asm: "VPAVGB", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VGF2P8MULB128", argLength: 2, reg: w21, asm: "VGF2P8MULB", commutative: false, typ: "Vec128", resultInArg0: false}, diff --git a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go index d07472b876..1180d32586 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go +++ b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go @@ -312,8 +312,9 @@ func simdGenericOps() []opData { {name: "SaturatedSubInt16x16", argLength: 2, commutative: false}, {name: "SaturatedSubMaskedInt16x16", argLength: 3, commutative: false}, {name: "ShiftAllLeftInt16x16", argLength: 2, commutative: false}, + {name: "ShiftAllLeftMaskedInt16x16", argLength: 3, commutative: false}, {name: "ShiftAllRightInt16x16", argLength: 2, commutative: false}, - {name: "ShiftAllRightSignExtendedInt16x16", argLength: 2, commutative: false}, + {name: "ShiftAllRightMaskedInt16x16", argLength: 3, commutative: false}, {name: "ShiftLeftInt16x16", argLength: 2, commutative: false}, {name: "ShiftLeftAndFillUpperFromInt16x16", argLength: 3, commutative: false}, {name: "ShiftLeftAndFillUpperFromMaskedInt16x16", argLength: 4, commutative: false}, @@ -322,8 +323,6 @@ func simdGenericOps() []opData { {name: "ShiftRightAndFillUpperFromInt16x16", argLength: 3, commutative: false}, {name: "ShiftRightAndFillUpperFromMaskedInt16x16", argLength: 4, commutative: false}, {name: "ShiftRightMaskedInt16x16", argLength: 3, commutative: false}, - {name: "ShiftRightSignExtendedInt16x16", argLength: 2, commutative: false}, - {name: "ShiftRightSignExtendedMaskedInt16x16", argLength: 3, commutative: false}, {name: "SignInt16x16", argLength: 2, commutative: false}, {name: "SubInt16x16", argLength: 2, commutative: false}, {name: "SubMaskedInt16x16", argLength: 3, commutative: false}, @@ -360,6 +359,10 @@ func simdGenericOps() []opData { {name: "SaturatedAddMaskedInt16x32", argLength: 3, commutative: true}, {name: "SaturatedSubInt16x32", argLength: 2, commutative: false}, {name: "SaturatedSubMaskedInt16x32", argLength: 3, commutative: false}, + {name: "ShiftAllLeftInt16x32", argLength: 2, commutative: false}, + {name: "ShiftAllLeftMaskedInt16x32", argLength: 3, commutative: false}, + {name: "ShiftAllRightInt16x32", argLength: 2, commutative: false}, + {name: "ShiftAllRightMaskedInt16x32", argLength: 3, commutative: false}, {name: "ShiftLeftInt16x32", argLength: 2, commutative: false}, {name: "ShiftLeftAndFillUpperFromInt16x32", argLength: 3, commutative: false}, {name: "ShiftLeftAndFillUpperFromMaskedInt16x32", argLength: 4, commutative: false}, @@ -368,8 +371,6 @@ func simdGenericOps() []opData { {name: "ShiftRightAndFillUpperFromInt16x32", argLength: 3, commutative: false}, {name: "ShiftRightAndFillUpperFromMaskedInt16x32", argLength: 4, commutative: false}, {name: "ShiftRightMaskedInt16x32", argLength: 3, commutative: false}, - {name: "ShiftRightSignExtendedInt16x32", argLength: 2, commutative: false}, - {name: "ShiftRightSignExtendedMaskedInt16x32", argLength: 3, commutative: false}, {name: "SubInt16x32", argLength: 2, commutative: false}, {name: "SubMaskedInt16x32", argLength: 3, commutative: false}, {name: "AbsoluteInt16x8", argLength: 1, commutative: false}, @@ -412,8 +413,9 @@ func simdGenericOps() []opData { {name: "SaturatedSubInt16x8", argLength: 2, commutative: false}, {name: "SaturatedSubMaskedInt16x8", argLength: 3, commutative: false}, {name: "ShiftAllLeftInt16x8", argLength: 2, commutative: false}, + {name: "ShiftAllLeftMaskedInt16x8", argLength: 3, commutative: false}, {name: "ShiftAllRightInt16x8", argLength: 2, commutative: false}, - {name: "ShiftAllRightSignExtendedInt16x8", argLength: 2, commutative: false}, + {name: "ShiftAllRightMaskedInt16x8", argLength: 3, commutative: false}, {name: "ShiftLeftInt16x8", argLength: 2, commutative: false}, {name: "ShiftLeftAndFillUpperFromInt16x8", argLength: 3, commutative: false}, {name: "ShiftLeftAndFillUpperFromMaskedInt16x8", argLength: 4, commutative: false}, @@ -422,8 +424,6 @@ func simdGenericOps() []opData { {name: "ShiftRightAndFillUpperFromInt16x8", argLength: 3, commutative: false}, {name: "ShiftRightAndFillUpperFromMaskedInt16x8", argLength: 4, commutative: false}, {name: "ShiftRightMaskedInt16x8", argLength: 3, commutative: false}, - {name: "ShiftRightSignExtendedInt16x8", argLength: 2, commutative: false}, - {name: "ShiftRightSignExtendedMaskedInt16x8", argLength: 3, commutative: false}, {name: "SignInt16x8", argLength: 2, commutative: false}, {name: "SubInt16x8", argLength: 2, commutative: false}, {name: "SubMaskedInt16x8", argLength: 3, commutative: false}, @@ -468,6 +468,10 @@ func simdGenericOps() []opData { {name: "SaturatedPairDotProdAccumulateMaskedInt32x16", argLength: 4, commutative: false}, {name: "SaturatedUnsignedSignedQuadDotProdAccumulateInt32x16", argLength: 3, commutative: false}, {name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x16", argLength: 4, commutative: false}, + {name: "ShiftAllLeftInt32x16", argLength: 2, commutative: false}, + {name: "ShiftAllLeftMaskedInt32x16", argLength: 3, commutative: false}, + {name: "ShiftAllRightInt32x16", argLength: 2, commutative: false}, + {name: "ShiftAllRightMaskedInt32x16", argLength: 3, commutative: false}, {name: "ShiftLeftInt32x16", argLength: 2, commutative: false}, {name: "ShiftLeftAndFillUpperFromInt32x16", argLength: 3, commutative: false}, {name: "ShiftLeftAndFillUpperFromMaskedInt32x16", argLength: 4, commutative: false}, @@ -476,8 +480,6 @@ func simdGenericOps() []opData { {name: "ShiftRightAndFillUpperFromInt32x16", argLength: 3, commutative: false}, {name: "ShiftRightAndFillUpperFromMaskedInt32x16", argLength: 4, commutative: false}, {name: "ShiftRightMaskedInt32x16", argLength: 3, commutative: false}, - {name: "ShiftRightSignExtendedInt32x16", argLength: 2, commutative: false}, - {name: "ShiftRightSignExtendedMaskedInt32x16", argLength: 3, commutative: false}, {name: "SubInt32x16", argLength: 2, commutative: false}, {name: "SubMaskedInt32x16", argLength: 3, commutative: false}, {name: "UnsignedSignedQuadDotProdAccumulateInt32x16", argLength: 3, commutative: false}, @@ -528,8 +530,9 @@ func simdGenericOps() []opData { {name: "SaturatedUnsignedSignedQuadDotProdAccumulateInt32x4", argLength: 3, commutative: false}, {name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x4", argLength: 4, commutative: false}, {name: "ShiftAllLeftInt32x4", argLength: 2, commutative: false}, + {name: "ShiftAllLeftMaskedInt32x4", argLength: 3, commutative: false}, {name: "ShiftAllRightInt32x4", argLength: 2, commutative: false}, - {name: "ShiftAllRightSignExtendedInt32x4", argLength: 2, commutative: false}, + {name: "ShiftAllRightMaskedInt32x4", argLength: 3, commutative: false}, {name: "ShiftLeftInt32x4", argLength: 2, commutative: false}, {name: "ShiftLeftAndFillUpperFromInt32x4", argLength: 3, commutative: false}, {name: "ShiftLeftAndFillUpperFromMaskedInt32x4", argLength: 4, commutative: false}, @@ -538,8 +541,6 @@ func simdGenericOps() []opData { {name: "ShiftRightAndFillUpperFromInt32x4", argLength: 3, commutative: false}, {name: "ShiftRightAndFillUpperFromMaskedInt32x4", argLength: 4, commutative: false}, {name: "ShiftRightMaskedInt32x4", argLength: 3, commutative: false}, - {name: "ShiftRightSignExtendedInt32x4", argLength: 2, commutative: false}, - {name: "ShiftRightSignExtendedMaskedInt32x4", argLength: 3, commutative: false}, {name: "SignInt32x4", argLength: 2, commutative: false}, {name: "SubInt32x4", argLength: 2, commutative: false}, {name: "SubMaskedInt32x4", argLength: 3, commutative: false}, @@ -591,8 +592,9 @@ func simdGenericOps() []opData { {name: "SaturatedUnsignedSignedQuadDotProdAccumulateInt32x8", argLength: 3, commutative: false}, {name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x8", argLength: 4, commutative: false}, {name: "ShiftAllLeftInt32x8", argLength: 2, commutative: false}, + {name: "ShiftAllLeftMaskedInt32x8", argLength: 3, commutative: false}, {name: "ShiftAllRightInt32x8", argLength: 2, commutative: false}, - {name: "ShiftAllRightSignExtendedInt32x8", argLength: 2, commutative: false}, + {name: "ShiftAllRightMaskedInt32x8", argLength: 3, commutative: false}, {name: "ShiftLeftInt32x8", argLength: 2, commutative: false}, {name: "ShiftLeftAndFillUpperFromInt32x8", argLength: 3, commutative: false}, {name: "ShiftLeftAndFillUpperFromMaskedInt32x8", argLength: 4, commutative: false}, @@ -601,8 +603,6 @@ func simdGenericOps() []opData { {name: "ShiftRightAndFillUpperFromInt32x8", argLength: 3, commutative: false}, {name: "ShiftRightAndFillUpperFromMaskedInt32x8", argLength: 4, commutative: false}, {name: "ShiftRightMaskedInt32x8", argLength: 3, commutative: false}, - {name: "ShiftRightSignExtendedInt32x8", argLength: 2, commutative: false}, - {name: "ShiftRightSignExtendedMaskedInt32x8", argLength: 3, commutative: false}, {name: "SignInt32x8", argLength: 2, commutative: false}, {name: "SubInt32x8", argLength: 2, commutative: false}, {name: "SubMaskedInt32x8", argLength: 3, commutative: false}, @@ -650,8 +650,6 @@ func simdGenericOps() []opData { {name: "ShiftAllLeftMaskedInt64x2", argLength: 3, commutative: false}, {name: "ShiftAllRightInt64x2", argLength: 2, commutative: false}, {name: "ShiftAllRightMaskedInt64x2", argLength: 3, commutative: false}, - {name: "ShiftAllRightSignExtendedInt64x2", argLength: 2, commutative: false}, - {name: "ShiftAllRightSignExtendedMaskedInt64x2", argLength: 3, commutative: false}, {name: "ShiftLeftInt64x2", argLength: 2, commutative: false}, {name: "ShiftLeftAndFillUpperFromInt64x2", argLength: 3, commutative: false}, {name: "ShiftLeftAndFillUpperFromMaskedInt64x2", argLength: 4, commutative: false}, @@ -660,8 +658,6 @@ func simdGenericOps() []opData { {name: "ShiftRightAndFillUpperFromInt64x2", argLength: 3, commutative: false}, {name: "ShiftRightAndFillUpperFromMaskedInt64x2", argLength: 4, commutative: false}, {name: "ShiftRightMaskedInt64x2", argLength: 3, commutative: false}, - {name: "ShiftRightSignExtendedInt64x2", argLength: 2, commutative: false}, - {name: "ShiftRightSignExtendedMaskedInt64x2", argLength: 3, commutative: false}, {name: "SubInt64x2", argLength: 2, commutative: false}, {name: "SubMaskedInt64x2", argLength: 3, commutative: false}, {name: "XorInt64x2", argLength: 2, commutative: true}, @@ -706,8 +702,6 @@ func simdGenericOps() []opData { {name: "ShiftAllLeftMaskedInt64x4", argLength: 3, commutative: false}, {name: "ShiftAllRightInt64x4", argLength: 2, commutative: false}, {name: "ShiftAllRightMaskedInt64x4", argLength: 3, commutative: false}, - {name: "ShiftAllRightSignExtendedInt64x4", argLength: 2, commutative: false}, - {name: "ShiftAllRightSignExtendedMaskedInt64x4", argLength: 3, commutative: false}, {name: "ShiftLeftInt64x4", argLength: 2, commutative: false}, {name: "ShiftLeftAndFillUpperFromInt64x4", argLength: 3, commutative: false}, {name: "ShiftLeftAndFillUpperFromMaskedInt64x4", argLength: 4, commutative: false}, @@ -716,8 +710,6 @@ func simdGenericOps() []opData { {name: "ShiftRightAndFillUpperFromInt64x4", argLength: 3, commutative: false}, {name: "ShiftRightAndFillUpperFromMaskedInt64x4", argLength: 4, commutative: false}, {name: "ShiftRightMaskedInt64x4", argLength: 3, commutative: false}, - {name: "ShiftRightSignExtendedInt64x4", argLength: 2, commutative: false}, - {name: "ShiftRightSignExtendedMaskedInt64x4", argLength: 3, commutative: false}, {name: "SubInt64x4", argLength: 2, commutative: false}, {name: "SubMaskedInt64x4", argLength: 3, commutative: false}, {name: "XorInt64x4", argLength: 2, commutative: true}, @@ -762,8 +754,6 @@ func simdGenericOps() []opData { {name: "ShiftAllLeftMaskedInt64x8", argLength: 3, commutative: false}, {name: "ShiftAllRightInt64x8", argLength: 2, commutative: false}, {name: "ShiftAllRightMaskedInt64x8", argLength: 3, commutative: false}, - {name: "ShiftAllRightSignExtendedInt64x8", argLength: 2, commutative: false}, - {name: "ShiftAllRightSignExtendedMaskedInt64x8", argLength: 3, commutative: false}, {name: "ShiftLeftInt64x8", argLength: 2, commutative: false}, {name: "ShiftLeftAndFillUpperFromInt64x8", argLength: 3, commutative: false}, {name: "ShiftLeftAndFillUpperFromMaskedInt64x8", argLength: 4, commutative: false}, @@ -772,8 +762,6 @@ func simdGenericOps() []opData { {name: "ShiftRightAndFillUpperFromInt64x8", argLength: 3, commutative: false}, {name: "ShiftRightAndFillUpperFromMaskedInt64x8", argLength: 4, commutative: false}, {name: "ShiftRightMaskedInt64x8", argLength: 3, commutative: false}, - {name: "ShiftRightSignExtendedInt64x8", argLength: 2, commutative: false}, - {name: "ShiftRightSignExtendedMaskedInt64x8", argLength: 3, commutative: false}, {name: "SubInt64x8", argLength: 2, commutative: false}, {name: "SubMaskedInt64x8", argLength: 3, commutative: false}, {name: "XorInt64x8", argLength: 2, commutative: true}, @@ -906,7 +894,9 @@ func simdGenericOps() []opData { {name: "SaturatedSubUint16x16", argLength: 2, commutative: false}, {name: "SaturatedSubMaskedUint16x16", argLength: 3, commutative: false}, {name: "ShiftAllLeftUint16x16", argLength: 2, commutative: false}, + {name: "ShiftAllLeftMaskedUint16x16", argLength: 3, commutative: false}, {name: "ShiftAllRightUint16x16", argLength: 2, commutative: false}, + {name: "ShiftAllRightMaskedUint16x16", argLength: 3, commutative: false}, {name: "ShiftLeftUint16x16", argLength: 2, commutative: false}, {name: "ShiftLeftAndFillUpperFromUint16x16", argLength: 3, commutative: false}, {name: "ShiftLeftAndFillUpperFromMaskedUint16x16", argLength: 4, commutative: false}, @@ -915,8 +905,6 @@ func simdGenericOps() []opData { {name: "ShiftRightAndFillUpperFromUint16x16", argLength: 3, commutative: false}, {name: "ShiftRightAndFillUpperFromMaskedUint16x16", argLength: 4, commutative: false}, {name: "ShiftRightMaskedUint16x16", argLength: 3, commutative: false}, - {name: "ShiftRightSignExtendedUint16x16", argLength: 2, commutative: false}, - {name: "ShiftRightSignExtendedMaskedUint16x16", argLength: 3, commutative: false}, {name: "SubUint16x16", argLength: 2, commutative: false}, {name: "SubMaskedUint16x16", argLength: 3, commutative: false}, {name: "XorUint16x16", argLength: 2, commutative: true}, @@ -948,6 +936,10 @@ func simdGenericOps() []opData { {name: "SaturatedAddMaskedUint16x32", argLength: 3, commutative: true}, {name: "SaturatedSubUint16x32", argLength: 2, commutative: false}, {name: "SaturatedSubMaskedUint16x32", argLength: 3, commutative: false}, + {name: "ShiftAllLeftUint16x32", argLength: 2, commutative: false}, + {name: "ShiftAllLeftMaskedUint16x32", argLength: 3, commutative: false}, + {name: "ShiftAllRightUint16x32", argLength: 2, commutative: false}, + {name: "ShiftAllRightMaskedUint16x32", argLength: 3, commutative: false}, {name: "ShiftLeftUint16x32", argLength: 2, commutative: false}, {name: "ShiftLeftAndFillUpperFromUint16x32", argLength: 3, commutative: false}, {name: "ShiftLeftAndFillUpperFromMaskedUint16x32", argLength: 4, commutative: false}, @@ -956,8 +948,6 @@ func simdGenericOps() []opData { {name: "ShiftRightAndFillUpperFromUint16x32", argLength: 3, commutative: false}, {name: "ShiftRightAndFillUpperFromMaskedUint16x32", argLength: 4, commutative: false}, {name: "ShiftRightMaskedUint16x32", argLength: 3, commutative: false}, - {name: "ShiftRightSignExtendedUint16x32", argLength: 2, commutative: false}, - {name: "ShiftRightSignExtendedMaskedUint16x32", argLength: 3, commutative: false}, {name: "SubUint16x32", argLength: 2, commutative: false}, {name: "SubMaskedUint16x32", argLength: 3, commutative: false}, {name: "AddUint16x8", argLength: 2, commutative: true}, @@ -994,7 +984,9 @@ func simdGenericOps() []opData { {name: "SaturatedSubUint16x8", argLength: 2, commutative: false}, {name: "SaturatedSubMaskedUint16x8", argLength: 3, commutative: false}, {name: "ShiftAllLeftUint16x8", argLength: 2, commutative: false}, + {name: "ShiftAllLeftMaskedUint16x8", argLength: 3, commutative: false}, {name: "ShiftAllRightUint16x8", argLength: 2, commutative: false}, + {name: "ShiftAllRightMaskedUint16x8", argLength: 3, commutative: false}, {name: "ShiftLeftUint16x8", argLength: 2, commutative: false}, {name: "ShiftLeftAndFillUpperFromUint16x8", argLength: 3, commutative: false}, {name: "ShiftLeftAndFillUpperFromMaskedUint16x8", argLength: 4, commutative: false}, @@ -1003,8 +995,6 @@ func simdGenericOps() []opData { {name: "ShiftRightAndFillUpperFromUint16x8", argLength: 3, commutative: false}, {name: "ShiftRightAndFillUpperFromMaskedUint16x8", argLength: 4, commutative: false}, {name: "ShiftRightMaskedUint16x8", argLength: 3, commutative: false}, - {name: "ShiftRightSignExtendedUint16x8", argLength: 2, commutative: false}, - {name: "ShiftRightSignExtendedMaskedUint16x8", argLength: 3, commutative: false}, {name: "SubUint16x8", argLength: 2, commutative: false}, {name: "SubMaskedUint16x8", argLength: 3, commutative: false}, {name: "XorUint16x8", argLength: 2, commutative: true}, @@ -1040,6 +1030,10 @@ func simdGenericOps() []opData { {name: "RotateRightMaskedUint32x16", argLength: 3, commutative: false}, {name: "SaturatedUnsignedSignedQuadDotProdAccumulateUint32x16", argLength: 3, commutative: false}, {name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x16", argLength: 4, commutative: false}, + {name: "ShiftAllLeftUint32x16", argLength: 2, commutative: false}, + {name: "ShiftAllLeftMaskedUint32x16", argLength: 3, commutative: false}, + {name: "ShiftAllRightUint32x16", argLength: 2, commutative: false}, + {name: "ShiftAllRightMaskedUint32x16", argLength: 3, commutative: false}, {name: "ShiftLeftUint32x16", argLength: 2, commutative: false}, {name: "ShiftLeftAndFillUpperFromUint32x16", argLength: 3, commutative: false}, {name: "ShiftLeftAndFillUpperFromMaskedUint32x16", argLength: 4, commutative: false}, @@ -1048,8 +1042,6 @@ func simdGenericOps() []opData { {name: "ShiftRightAndFillUpperFromUint32x16", argLength: 3, commutative: false}, {name: "ShiftRightAndFillUpperFromMaskedUint32x16", argLength: 4, commutative: false}, {name: "ShiftRightMaskedUint32x16", argLength: 3, commutative: false}, - {name: "ShiftRightSignExtendedUint32x16", argLength: 2, commutative: false}, - {name: "ShiftRightSignExtendedMaskedUint32x16", argLength: 3, commutative: false}, {name: "SubUint32x16", argLength: 2, commutative: false}, {name: "SubMaskedUint32x16", argLength: 3, commutative: false}, {name: "UnsignedSignedQuadDotProdAccumulateUint32x16", argLength: 3, commutative: false}, @@ -1092,7 +1084,9 @@ func simdGenericOps() []opData { {name: "SaturatedUnsignedSignedQuadDotProdAccumulateUint32x4", argLength: 3, commutative: false}, {name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x4", argLength: 4, commutative: false}, {name: "ShiftAllLeftUint32x4", argLength: 2, commutative: false}, + {name: "ShiftAllLeftMaskedUint32x4", argLength: 3, commutative: false}, {name: "ShiftAllRightUint32x4", argLength: 2, commutative: false}, + {name: "ShiftAllRightMaskedUint32x4", argLength: 3, commutative: false}, {name: "ShiftLeftUint32x4", argLength: 2, commutative: false}, {name: "ShiftLeftAndFillUpperFromUint32x4", argLength: 3, commutative: false}, {name: "ShiftLeftAndFillUpperFromMaskedUint32x4", argLength: 4, commutative: false}, @@ -1101,8 +1095,6 @@ func simdGenericOps() []opData { {name: "ShiftRightAndFillUpperFromUint32x4", argLength: 3, commutative: false}, {name: "ShiftRightAndFillUpperFromMaskedUint32x4", argLength: 4, commutative: false}, {name: "ShiftRightMaskedUint32x4", argLength: 3, commutative: false}, - {name: "ShiftRightSignExtendedUint32x4", argLength: 2, commutative: false}, - {name: "ShiftRightSignExtendedMaskedUint32x4", argLength: 3, commutative: false}, {name: "SubUint32x4", argLength: 2, commutative: false}, {name: "SubMaskedUint32x4", argLength: 3, commutative: false}, {name: "UnsignedSignedQuadDotProdAccumulateUint32x4", argLength: 3, commutative: false}, @@ -1145,7 +1137,9 @@ func simdGenericOps() []opData { {name: "SaturatedUnsignedSignedQuadDotProdAccumulateUint32x8", argLength: 3, commutative: false}, {name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x8", argLength: 4, commutative: false}, {name: "ShiftAllLeftUint32x8", argLength: 2, commutative: false}, + {name: "ShiftAllLeftMaskedUint32x8", argLength: 3, commutative: false}, {name: "ShiftAllRightUint32x8", argLength: 2, commutative: false}, + {name: "ShiftAllRightMaskedUint32x8", argLength: 3, commutative: false}, {name: "ShiftLeftUint32x8", argLength: 2, commutative: false}, {name: "ShiftLeftAndFillUpperFromUint32x8", argLength: 3, commutative: false}, {name: "ShiftLeftAndFillUpperFromMaskedUint32x8", argLength: 4, commutative: false}, @@ -1154,8 +1148,6 @@ func simdGenericOps() []opData { {name: "ShiftRightAndFillUpperFromUint32x8", argLength: 3, commutative: false}, {name: "ShiftRightAndFillUpperFromMaskedUint32x8", argLength: 4, commutative: false}, {name: "ShiftRightMaskedUint32x8", argLength: 3, commutative: false}, - {name: "ShiftRightSignExtendedUint32x8", argLength: 2, commutative: false}, - {name: "ShiftRightSignExtendedMaskedUint32x8", argLength: 3, commutative: false}, {name: "SubUint32x8", argLength: 2, commutative: false}, {name: "SubMaskedUint32x8", argLength: 3, commutative: false}, {name: "UnsignedSignedQuadDotProdAccumulateUint32x8", argLength: 3, commutative: false}, @@ -1206,8 +1198,6 @@ func simdGenericOps() []opData { {name: "ShiftRightAndFillUpperFromUint64x2", argLength: 3, commutative: false}, {name: "ShiftRightAndFillUpperFromMaskedUint64x2", argLength: 4, commutative: false}, {name: "ShiftRightMaskedUint64x2", argLength: 3, commutative: false}, - {name: "ShiftRightSignExtendedUint64x2", argLength: 2, commutative: false}, - {name: "ShiftRightSignExtendedMaskedUint64x2", argLength: 3, commutative: false}, {name: "SubUint64x2", argLength: 2, commutative: false}, {name: "SubMaskedUint64x2", argLength: 3, commutative: false}, {name: "XorUint64x2", argLength: 2, commutative: true}, @@ -1256,8 +1246,6 @@ func simdGenericOps() []opData { {name: "ShiftRightAndFillUpperFromUint64x4", argLength: 3, commutative: false}, {name: "ShiftRightAndFillUpperFromMaskedUint64x4", argLength: 4, commutative: false}, {name: "ShiftRightMaskedUint64x4", argLength: 3, commutative: false}, - {name: "ShiftRightSignExtendedUint64x4", argLength: 2, commutative: false}, - {name: "ShiftRightSignExtendedMaskedUint64x4", argLength: 3, commutative: false}, {name: "SubUint64x4", argLength: 2, commutative: false}, {name: "SubMaskedUint64x4", argLength: 3, commutative: false}, {name: "XorUint64x4", argLength: 2, commutative: true}, @@ -1306,8 +1294,6 @@ func simdGenericOps() []opData { {name: "ShiftRightAndFillUpperFromUint64x8", argLength: 3, commutative: false}, {name: "ShiftRightAndFillUpperFromMaskedUint64x8", argLength: 4, commutative: false}, {name: "ShiftRightMaskedUint64x8", argLength: 3, commutative: false}, - {name: "ShiftRightSignExtendedUint64x8", argLength: 2, commutative: false}, - {name: "ShiftRightSignExtendedMaskedUint64x8", argLength: 3, commutative: false}, {name: "SubUint64x8", argLength: 2, commutative: false}, {name: "SubMaskedUint64x8", argLength: 3, commutative: false}, {name: "XorUint64x8", argLength: 2, commutative: true}, diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index d5c5085949..9067023f3a 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -1393,17 +1393,16 @@ const ( OpAMD64VPSUBSW256 OpAMD64VPSUBSWMasked256 OpAMD64VPSLLW256 - OpAMD64VPSRLW256 + OpAMD64VPSLLWMasked256 OpAMD64VPSRAW256 + OpAMD64VPSRAWMasked256 OpAMD64VPSLLVW256 OpAMD64VPSHLDVW256 OpAMD64VPSHLDVWMasked256 OpAMD64VPSLLVWMasked256 - OpAMD64VPSRLVW256 + OpAMD64VPSRAVW256 OpAMD64VPSHRDVW256 OpAMD64VPSHRDVWMasked256 - OpAMD64VPSRLVWMasked256 - OpAMD64VPSRAVW256 OpAMD64VPSRAVWMasked256 OpAMD64VPSIGNW256 OpAMD64VPSUBW256 @@ -1428,15 +1427,17 @@ const ( OpAMD64VPADDSWMasked512 OpAMD64VPSUBSW512 OpAMD64VPSUBSWMasked512 + OpAMD64VPSLLW512 + OpAMD64VPSLLWMasked512 + OpAMD64VPSRAW512 + OpAMD64VPSRAWMasked512 OpAMD64VPSLLVW512 OpAMD64VPSHLDVW512 OpAMD64VPSHLDVWMasked512 OpAMD64VPSLLVWMasked512 - OpAMD64VPSRLVW512 + OpAMD64VPSRAVW512 OpAMD64VPSHRDVW512 OpAMD64VPSHRDVWMasked512 - OpAMD64VPSRLVWMasked512 - OpAMD64VPSRAVW512 OpAMD64VPSRAVWMasked512 OpAMD64VPSUBW512 OpAMD64VPSUBWMasked512 @@ -1467,17 +1468,16 @@ const ( OpAMD64VPSUBSW128 OpAMD64VPSUBSWMasked128 OpAMD64VPSLLW128 - OpAMD64VPSRLW128 + OpAMD64VPSLLWMasked128 OpAMD64VPSRAW128 + OpAMD64VPSRAWMasked128 OpAMD64VPSLLVW128 OpAMD64VPSHLDVW128 OpAMD64VPSHLDVWMasked128 OpAMD64VPSLLVWMasked128 - OpAMD64VPSRLVW128 + OpAMD64VPSRAVW128 OpAMD64VPSHRDVW128 OpAMD64VPSHRDVWMasked128 - OpAMD64VPSRLVWMasked128 - OpAMD64VPSRAVW128 OpAMD64VPSRAVWMasked128 OpAMD64VPSIGNW128 OpAMD64VPSUBW128 @@ -1510,15 +1510,17 @@ const ( OpAMD64VPDPWSSDSMasked512 OpAMD64VPDPBUSDS512 OpAMD64VPDPBUSDSMasked512 + OpAMD64VPSLLD512 + OpAMD64VPSLLDMasked512 + OpAMD64VPSRAD512 + OpAMD64VPSRADMasked512 OpAMD64VPSLLVD512 OpAMD64VPSHLDVD512 OpAMD64VPSHLDVDMasked512 OpAMD64VPSLLVDMasked512 - OpAMD64VPSRLVD512 + OpAMD64VPSRAVD512 OpAMD64VPSHRDVD512 OpAMD64VPSHRDVDMasked512 - OpAMD64VPSRLVDMasked512 - OpAMD64VPSRAVD512 OpAMD64VPSRAVDMasked512 OpAMD64VPSUBD512 OpAMD64VPSUBDMasked512 @@ -1557,17 +1559,16 @@ const ( OpAMD64VPDPBUSDS128 OpAMD64VPDPBUSDSMasked128 OpAMD64VPSLLD128 - OpAMD64VPSRLD128 + OpAMD64VPSLLDMasked128 OpAMD64VPSRAD128 + OpAMD64VPSRADMasked128 OpAMD64VPSLLVD128 OpAMD64VPSHLDVD128 OpAMD64VPSHLDVDMasked128 OpAMD64VPSLLVDMasked128 - OpAMD64VPSRLVD128 + OpAMD64VPSRAVD128 OpAMD64VPSHRDVD128 OpAMD64VPSHRDVDMasked128 - OpAMD64VPSRLVDMasked128 - OpAMD64VPSRAVD128 OpAMD64VPSRAVDMasked128 OpAMD64VPSIGND128 OpAMD64VPSUBD128 @@ -1606,17 +1607,16 @@ const ( OpAMD64VPDPBUSDS256 OpAMD64VPDPBUSDSMasked256 OpAMD64VPSLLD256 - OpAMD64VPSRLD256 + OpAMD64VPSLLDMasked256 OpAMD64VPSRAD256 + OpAMD64VPSRADMasked256 OpAMD64VPSLLVD256 OpAMD64VPSHLDVD256 OpAMD64VPSHLDVDMasked256 OpAMD64VPSLLVDMasked256 - OpAMD64VPSRLVD256 + OpAMD64VPSRAVD256 OpAMD64VPSHRDVD256 OpAMD64VPSHRDVDMasked256 - OpAMD64VPSRLVDMasked256 - OpAMD64VPSRAVD256 OpAMD64VPSRAVDMasked256 OpAMD64VPSIGND256 OpAMD64VPSUBD256 @@ -1648,19 +1648,15 @@ const ( OpAMD64VPRORVQMasked128 OpAMD64VPSLLQ128 OpAMD64VPSLLQMasked128 - OpAMD64VPSRLQ128 - OpAMD64VPSRLQMasked128 OpAMD64VPSRAQ128 OpAMD64VPSRAQMasked128 OpAMD64VPSLLVQ128 OpAMD64VPSHLDVQ128 OpAMD64VPSHLDVQMasked128 OpAMD64VPSLLVQMasked128 - OpAMD64VPSRLVQ128 + OpAMD64VPSRAVQ128 OpAMD64VPSHRDVQ128 OpAMD64VPSHRDVQMasked128 - OpAMD64VPSRLVQMasked128 - OpAMD64VPSRAVQ128 OpAMD64VPSRAVQMasked128 OpAMD64VPSUBQ128 OpAMD64VPSUBQMasked128 @@ -1689,19 +1685,15 @@ const ( OpAMD64VPRORVQMasked256 OpAMD64VPSLLQ256 OpAMD64VPSLLQMasked256 - OpAMD64VPSRLQ256 - OpAMD64VPSRLQMasked256 OpAMD64VPSRAQ256 OpAMD64VPSRAQMasked256 OpAMD64VPSLLVQ256 OpAMD64VPSHLDVQ256 OpAMD64VPSHLDVQMasked256 OpAMD64VPSLLVQMasked256 - OpAMD64VPSRLVQ256 + OpAMD64VPSRAVQ256 OpAMD64VPSHRDVQ256 OpAMD64VPSHRDVQMasked256 - OpAMD64VPSRLVQMasked256 - OpAMD64VPSRAVQ256 OpAMD64VPSRAVQMasked256 OpAMD64VPSUBQ256 OpAMD64VPSUBQMasked256 @@ -1732,19 +1724,15 @@ const ( OpAMD64VPRORVQMasked512 OpAMD64VPSLLQ512 OpAMD64VPSLLQMasked512 - OpAMD64VPSRLQ512 - OpAMD64VPSRLQMasked512 OpAMD64VPSRAQ512 OpAMD64VPSRAQMasked512 OpAMD64VPSLLVQ512 OpAMD64VPSHLDVQ512 OpAMD64VPSHLDVQMasked512 OpAMD64VPSLLVQMasked512 - OpAMD64VPSRLVQ512 + OpAMD64VPSRAVQ512 OpAMD64VPSHRDVQ512 OpAMD64VPSHRDVQMasked512 - OpAMD64VPSRLVQMasked512 - OpAMD64VPSRAVQ512 OpAMD64VPSRAVQMasked512 OpAMD64VPSUBQ512 OpAMD64VPSUBQMasked512 @@ -1820,6 +1808,10 @@ const ( OpAMD64VPMINUWMasked256 OpAMD64VPMULHUW256 OpAMD64VPMULHUWMasked256 + OpAMD64VPSRLW256 + OpAMD64VPSRLWMasked256 + OpAMD64VPSRLVW256 + OpAMD64VPSRLVWMasked256 OpAMD64VPAVGW512 OpAMD64VPAVGWMasked512 OpAMD64VPMAXUW512 @@ -1828,6 +1820,10 @@ const ( OpAMD64VPMINUWMasked512 OpAMD64VPMULHUW512 OpAMD64VPMULHUWMasked512 + OpAMD64VPSRLW512 + OpAMD64VPSRLWMasked512 + OpAMD64VPSRLVW512 + OpAMD64VPSRLVWMasked512 OpAMD64VPAVGW128 OpAMD64VPAVGWMasked128 OpAMD64VPMAXUW128 @@ -1836,36 +1832,64 @@ const ( OpAMD64VPMINUWMasked128 OpAMD64VPMULHUW128 OpAMD64VPMULHUWMasked128 + OpAMD64VPSRLW128 + OpAMD64VPSRLWMasked128 + OpAMD64VPSRLVW128 + OpAMD64VPSRLVWMasked128 OpAMD64VPMAXUD512 OpAMD64VPMAXUDMasked512 OpAMD64VPMINUD512 OpAMD64VPMINUDMasked512 + OpAMD64VPSRLD512 + OpAMD64VPSRLDMasked512 + OpAMD64VPSRLVD512 + OpAMD64VPSRLVDMasked512 OpAMD64VPMAXUD128 OpAMD64VPMAXUDMasked128 OpAMD64VPMINUD128 OpAMD64VPMINUDMasked128 OpAMD64VPMULUDQ128 + OpAMD64VPSRLD128 + OpAMD64VPSRLDMasked128 + OpAMD64VPSRLVD128 + OpAMD64VPSRLVDMasked128 OpAMD64VPMAXUD256 OpAMD64VPMAXUDMasked256 OpAMD64VPMINUD256 OpAMD64VPMINUDMasked256 OpAMD64VPMULUDQ256 + OpAMD64VPSRLD256 + OpAMD64VPSRLDMasked256 + OpAMD64VPSRLVD256 + OpAMD64VPSRLVDMasked256 OpAMD64VPMAXUQ128 OpAMD64VPMAXUQMasked128 OpAMD64VPMINUQ128 OpAMD64VPMINUQMasked128 OpAMD64VPMULUDQMasked128 + OpAMD64VPSRLQ128 + OpAMD64VPSRLQMasked128 + OpAMD64VPSRLVQ128 + OpAMD64VPSRLVQMasked128 OpAMD64VPMAXUQ256 OpAMD64VPMAXUQMasked256 OpAMD64VPMINUQ256 OpAMD64VPMINUQMasked256 OpAMD64VPMULUDQMasked256 + OpAMD64VPSRLQ256 + OpAMD64VPSRLQMasked256 + OpAMD64VPSRLVQ256 + OpAMD64VPSRLVQMasked256 OpAMD64VPMAXUQ512 OpAMD64VPMAXUQMasked512 OpAMD64VPMINUQ512 OpAMD64VPMINUQMasked512 OpAMD64VPMULUDQ512 OpAMD64VPMULUDQMasked512 + OpAMD64VPSRLQ512 + OpAMD64VPSRLQMasked512 + OpAMD64VPSRLVQ512 + OpAMD64VPSRLVQMasked512 OpAMD64VPAVGB128 OpAMD64VPAVGBMasked128 OpAMD64VGF2P8MULB128 @@ -4604,8 +4628,9 @@ const ( OpSaturatedSubInt16x16 OpSaturatedSubMaskedInt16x16 OpShiftAllLeftInt16x16 + OpShiftAllLeftMaskedInt16x16 OpShiftAllRightInt16x16 - OpShiftAllRightSignExtendedInt16x16 + OpShiftAllRightMaskedInt16x16 OpShiftLeftInt16x16 OpShiftLeftAndFillUpperFromInt16x16 OpShiftLeftAndFillUpperFromMaskedInt16x16 @@ -4614,8 +4639,6 @@ const ( OpShiftRightAndFillUpperFromInt16x16 OpShiftRightAndFillUpperFromMaskedInt16x16 OpShiftRightMaskedInt16x16 - OpShiftRightSignExtendedInt16x16 - OpShiftRightSignExtendedMaskedInt16x16 OpSignInt16x16 OpSubInt16x16 OpSubMaskedInt16x16 @@ -4652,6 +4675,10 @@ const ( OpSaturatedAddMaskedInt16x32 OpSaturatedSubInt16x32 OpSaturatedSubMaskedInt16x32 + OpShiftAllLeftInt16x32 + OpShiftAllLeftMaskedInt16x32 + OpShiftAllRightInt16x32 + OpShiftAllRightMaskedInt16x32 OpShiftLeftInt16x32 OpShiftLeftAndFillUpperFromInt16x32 OpShiftLeftAndFillUpperFromMaskedInt16x32 @@ -4660,8 +4687,6 @@ const ( OpShiftRightAndFillUpperFromInt16x32 OpShiftRightAndFillUpperFromMaskedInt16x32 OpShiftRightMaskedInt16x32 - OpShiftRightSignExtendedInt16x32 - OpShiftRightSignExtendedMaskedInt16x32 OpSubInt16x32 OpSubMaskedInt16x32 OpAbsoluteInt16x8 @@ -4704,8 +4729,9 @@ const ( OpSaturatedSubInt16x8 OpSaturatedSubMaskedInt16x8 OpShiftAllLeftInt16x8 + OpShiftAllLeftMaskedInt16x8 OpShiftAllRightInt16x8 - OpShiftAllRightSignExtendedInt16x8 + OpShiftAllRightMaskedInt16x8 OpShiftLeftInt16x8 OpShiftLeftAndFillUpperFromInt16x8 OpShiftLeftAndFillUpperFromMaskedInt16x8 @@ -4714,8 +4740,6 @@ const ( OpShiftRightAndFillUpperFromInt16x8 OpShiftRightAndFillUpperFromMaskedInt16x8 OpShiftRightMaskedInt16x8 - OpShiftRightSignExtendedInt16x8 - OpShiftRightSignExtendedMaskedInt16x8 OpSignInt16x8 OpSubInt16x8 OpSubMaskedInt16x8 @@ -4760,6 +4784,10 @@ const ( OpSaturatedPairDotProdAccumulateMaskedInt32x16 OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16 OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x16 + OpShiftAllLeftInt32x16 + OpShiftAllLeftMaskedInt32x16 + OpShiftAllRightInt32x16 + OpShiftAllRightMaskedInt32x16 OpShiftLeftInt32x16 OpShiftLeftAndFillUpperFromInt32x16 OpShiftLeftAndFillUpperFromMaskedInt32x16 @@ -4768,8 +4796,6 @@ const ( OpShiftRightAndFillUpperFromInt32x16 OpShiftRightAndFillUpperFromMaskedInt32x16 OpShiftRightMaskedInt32x16 - OpShiftRightSignExtendedInt32x16 - OpShiftRightSignExtendedMaskedInt32x16 OpSubInt32x16 OpSubMaskedInt32x16 OpUnsignedSignedQuadDotProdAccumulateInt32x16 @@ -4820,8 +4846,9 @@ const ( OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4 OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x4 OpShiftAllLeftInt32x4 + OpShiftAllLeftMaskedInt32x4 OpShiftAllRightInt32x4 - OpShiftAllRightSignExtendedInt32x4 + OpShiftAllRightMaskedInt32x4 OpShiftLeftInt32x4 OpShiftLeftAndFillUpperFromInt32x4 OpShiftLeftAndFillUpperFromMaskedInt32x4 @@ -4830,8 +4857,6 @@ const ( OpShiftRightAndFillUpperFromInt32x4 OpShiftRightAndFillUpperFromMaskedInt32x4 OpShiftRightMaskedInt32x4 - OpShiftRightSignExtendedInt32x4 - OpShiftRightSignExtendedMaskedInt32x4 OpSignInt32x4 OpSubInt32x4 OpSubMaskedInt32x4 @@ -4883,8 +4908,9 @@ const ( OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8 OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x8 OpShiftAllLeftInt32x8 + OpShiftAllLeftMaskedInt32x8 OpShiftAllRightInt32x8 - OpShiftAllRightSignExtendedInt32x8 + OpShiftAllRightMaskedInt32x8 OpShiftLeftInt32x8 OpShiftLeftAndFillUpperFromInt32x8 OpShiftLeftAndFillUpperFromMaskedInt32x8 @@ -4893,8 +4919,6 @@ const ( OpShiftRightAndFillUpperFromInt32x8 OpShiftRightAndFillUpperFromMaskedInt32x8 OpShiftRightMaskedInt32x8 - OpShiftRightSignExtendedInt32x8 - OpShiftRightSignExtendedMaskedInt32x8 OpSignInt32x8 OpSubInt32x8 OpSubMaskedInt32x8 @@ -4942,8 +4966,6 @@ const ( OpShiftAllLeftMaskedInt64x2 OpShiftAllRightInt64x2 OpShiftAllRightMaskedInt64x2 - OpShiftAllRightSignExtendedInt64x2 - OpShiftAllRightSignExtendedMaskedInt64x2 OpShiftLeftInt64x2 OpShiftLeftAndFillUpperFromInt64x2 OpShiftLeftAndFillUpperFromMaskedInt64x2 @@ -4952,8 +4974,6 @@ const ( OpShiftRightAndFillUpperFromInt64x2 OpShiftRightAndFillUpperFromMaskedInt64x2 OpShiftRightMaskedInt64x2 - OpShiftRightSignExtendedInt64x2 - OpShiftRightSignExtendedMaskedInt64x2 OpSubInt64x2 OpSubMaskedInt64x2 OpXorInt64x2 @@ -4998,8 +5018,6 @@ const ( OpShiftAllLeftMaskedInt64x4 OpShiftAllRightInt64x4 OpShiftAllRightMaskedInt64x4 - OpShiftAllRightSignExtendedInt64x4 - OpShiftAllRightSignExtendedMaskedInt64x4 OpShiftLeftInt64x4 OpShiftLeftAndFillUpperFromInt64x4 OpShiftLeftAndFillUpperFromMaskedInt64x4 @@ -5008,8 +5026,6 @@ const ( OpShiftRightAndFillUpperFromInt64x4 OpShiftRightAndFillUpperFromMaskedInt64x4 OpShiftRightMaskedInt64x4 - OpShiftRightSignExtendedInt64x4 - OpShiftRightSignExtendedMaskedInt64x4 OpSubInt64x4 OpSubMaskedInt64x4 OpXorInt64x4 @@ -5054,8 +5070,6 @@ const ( OpShiftAllLeftMaskedInt64x8 OpShiftAllRightInt64x8 OpShiftAllRightMaskedInt64x8 - OpShiftAllRightSignExtendedInt64x8 - OpShiftAllRightSignExtendedMaskedInt64x8 OpShiftLeftInt64x8 OpShiftLeftAndFillUpperFromInt64x8 OpShiftLeftAndFillUpperFromMaskedInt64x8 @@ -5064,8 +5078,6 @@ const ( OpShiftRightAndFillUpperFromInt64x8 OpShiftRightAndFillUpperFromMaskedInt64x8 OpShiftRightMaskedInt64x8 - OpShiftRightSignExtendedInt64x8 - OpShiftRightSignExtendedMaskedInt64x8 OpSubInt64x8 OpSubMaskedInt64x8 OpXorInt64x8 @@ -5198,7 +5210,9 @@ const ( OpSaturatedSubUint16x16 OpSaturatedSubMaskedUint16x16 OpShiftAllLeftUint16x16 + OpShiftAllLeftMaskedUint16x16 OpShiftAllRightUint16x16 + OpShiftAllRightMaskedUint16x16 OpShiftLeftUint16x16 OpShiftLeftAndFillUpperFromUint16x16 OpShiftLeftAndFillUpperFromMaskedUint16x16 @@ -5207,8 +5221,6 @@ const ( OpShiftRightAndFillUpperFromUint16x16 OpShiftRightAndFillUpperFromMaskedUint16x16 OpShiftRightMaskedUint16x16 - OpShiftRightSignExtendedUint16x16 - OpShiftRightSignExtendedMaskedUint16x16 OpSubUint16x16 OpSubMaskedUint16x16 OpXorUint16x16 @@ -5240,6 +5252,10 @@ const ( OpSaturatedAddMaskedUint16x32 OpSaturatedSubUint16x32 OpSaturatedSubMaskedUint16x32 + OpShiftAllLeftUint16x32 + OpShiftAllLeftMaskedUint16x32 + OpShiftAllRightUint16x32 + OpShiftAllRightMaskedUint16x32 OpShiftLeftUint16x32 OpShiftLeftAndFillUpperFromUint16x32 OpShiftLeftAndFillUpperFromMaskedUint16x32 @@ -5248,8 +5264,6 @@ const ( OpShiftRightAndFillUpperFromUint16x32 OpShiftRightAndFillUpperFromMaskedUint16x32 OpShiftRightMaskedUint16x32 - OpShiftRightSignExtendedUint16x32 - OpShiftRightSignExtendedMaskedUint16x32 OpSubUint16x32 OpSubMaskedUint16x32 OpAddUint16x8 @@ -5286,7 +5300,9 @@ const ( OpSaturatedSubUint16x8 OpSaturatedSubMaskedUint16x8 OpShiftAllLeftUint16x8 + OpShiftAllLeftMaskedUint16x8 OpShiftAllRightUint16x8 + OpShiftAllRightMaskedUint16x8 OpShiftLeftUint16x8 OpShiftLeftAndFillUpperFromUint16x8 OpShiftLeftAndFillUpperFromMaskedUint16x8 @@ -5295,8 +5311,6 @@ const ( OpShiftRightAndFillUpperFromUint16x8 OpShiftRightAndFillUpperFromMaskedUint16x8 OpShiftRightMaskedUint16x8 - OpShiftRightSignExtendedUint16x8 - OpShiftRightSignExtendedMaskedUint16x8 OpSubUint16x8 OpSubMaskedUint16x8 OpXorUint16x8 @@ -5332,6 +5346,10 @@ const ( OpRotateRightMaskedUint32x16 OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16 OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x16 + OpShiftAllLeftUint32x16 + OpShiftAllLeftMaskedUint32x16 + OpShiftAllRightUint32x16 + OpShiftAllRightMaskedUint32x16 OpShiftLeftUint32x16 OpShiftLeftAndFillUpperFromUint32x16 OpShiftLeftAndFillUpperFromMaskedUint32x16 @@ -5340,8 +5358,6 @@ const ( OpShiftRightAndFillUpperFromUint32x16 OpShiftRightAndFillUpperFromMaskedUint32x16 OpShiftRightMaskedUint32x16 - OpShiftRightSignExtendedUint32x16 - OpShiftRightSignExtendedMaskedUint32x16 OpSubUint32x16 OpSubMaskedUint32x16 OpUnsignedSignedQuadDotProdAccumulateUint32x16 @@ -5384,7 +5400,9 @@ const ( OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4 OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x4 OpShiftAllLeftUint32x4 + OpShiftAllLeftMaskedUint32x4 OpShiftAllRightUint32x4 + OpShiftAllRightMaskedUint32x4 OpShiftLeftUint32x4 OpShiftLeftAndFillUpperFromUint32x4 OpShiftLeftAndFillUpperFromMaskedUint32x4 @@ -5393,8 +5411,6 @@ const ( OpShiftRightAndFillUpperFromUint32x4 OpShiftRightAndFillUpperFromMaskedUint32x4 OpShiftRightMaskedUint32x4 - OpShiftRightSignExtendedUint32x4 - OpShiftRightSignExtendedMaskedUint32x4 OpSubUint32x4 OpSubMaskedUint32x4 OpUnsignedSignedQuadDotProdAccumulateUint32x4 @@ -5437,7 +5453,9 @@ const ( OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8 OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x8 OpShiftAllLeftUint32x8 + OpShiftAllLeftMaskedUint32x8 OpShiftAllRightUint32x8 + OpShiftAllRightMaskedUint32x8 OpShiftLeftUint32x8 OpShiftLeftAndFillUpperFromUint32x8 OpShiftLeftAndFillUpperFromMaskedUint32x8 @@ -5446,8 +5464,6 @@ const ( OpShiftRightAndFillUpperFromUint32x8 OpShiftRightAndFillUpperFromMaskedUint32x8 OpShiftRightMaskedUint32x8 - OpShiftRightSignExtendedUint32x8 - OpShiftRightSignExtendedMaskedUint32x8 OpSubUint32x8 OpSubMaskedUint32x8 OpUnsignedSignedQuadDotProdAccumulateUint32x8 @@ -5498,8 +5514,6 @@ const ( OpShiftRightAndFillUpperFromUint64x2 OpShiftRightAndFillUpperFromMaskedUint64x2 OpShiftRightMaskedUint64x2 - OpShiftRightSignExtendedUint64x2 - OpShiftRightSignExtendedMaskedUint64x2 OpSubUint64x2 OpSubMaskedUint64x2 OpXorUint64x2 @@ -5548,8 +5562,6 @@ const ( OpShiftRightAndFillUpperFromUint64x4 OpShiftRightAndFillUpperFromMaskedUint64x4 OpShiftRightMaskedUint64x4 - OpShiftRightSignExtendedUint64x4 - OpShiftRightSignExtendedMaskedUint64x4 OpSubUint64x4 OpSubMaskedUint64x4 OpXorUint64x4 @@ -5598,8 +5610,6 @@ const ( OpShiftRightAndFillUpperFromUint64x8 OpShiftRightAndFillUpperFromMaskedUint64x8 OpShiftRightMaskedUint64x8 - OpShiftRightSignExtendedUint64x8 - OpShiftRightSignExtendedMaskedUint64x8 OpSubUint64x8 OpSubMaskedUint64x8 OpXorUint64x8 @@ -21491,16 +21501,17 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRLW256", - argLen: 2, - asm: x86.AVPSRLW, + name: "VPSLLWMasked256", + argLen: 3, + asm: x86.AVPSLLW, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 }, outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 }, }, }, @@ -21518,6 +21529,21 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPSRAWMasked256", + argLen: 3, + asm: x86.AVPSRAW, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, { name: "VPSLLVW256", argLen: 2, @@ -21581,9 +21607,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRLVW256", + name: "VPSRAVW256", argLen: 2, - asm: x86.AVPSRLVW, + asm: x86.AVPSRAVW, reg: regInfo{ inputs: []inputInfo{ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 @@ -21627,35 +21653,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "VPSRLVWMasked256", - argLen: 3, - asm: x86.AVPSRLVW, - reg: regInfo{ - inputs: []inputInfo{ - {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, - { - name: "VPSRAVW256", - argLen: 2, - asm: x86.AVPSRAVW, - reg: regInfo{ - inputs: []inputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {1, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, { name: "VPSRAVWMasked256", argLen: 3, @@ -22012,6 +22009,64 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPSLLW512", + argLen: 2, + asm: x86.AVPSLLW, + reg: regInfo{ + inputs: []inputInfo{ + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPSLLWMasked512", + argLen: 3, + asm: x86.AVPSLLW, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPSRAW512", + argLen: 2, + asm: x86.AVPSRAW, + reg: regInfo{ + inputs: []inputInfo{ + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPSRAWMasked512", + argLen: 3, + asm: x86.AVPSRAW, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, { name: "VPSLLVW512", argLen: 2, @@ -22075,9 +22130,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRLVW512", + name: "VPSRAVW512", argLen: 2, - asm: x86.AVPSRLVW, + asm: x86.AVPSRAVW, reg: regInfo{ inputs: []inputInfo{ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 @@ -22121,35 +22176,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "VPSRLVWMasked512", - argLen: 3, - asm: x86.AVPSRLVW, - reg: regInfo{ - inputs: []inputInfo{ - {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, - { - name: "VPSRAVW512", - argLen: 2, - asm: x86.AVPSRAVW, - reg: regInfo{ - inputs: []inputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {1, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, { name: "VPSRAVWMasked512", argLen: 3, @@ -22592,16 +22618,17 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRLW128", - argLen: 2, - asm: x86.AVPSRLW, + name: "VPSLLWMasked128", + argLen: 3, + asm: x86.AVPSLLW, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 }, outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 }, }, }, @@ -22619,6 +22646,21 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPSRAWMasked128", + argLen: 3, + asm: x86.AVPSRAW, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, { name: "VPSLLVW128", argLen: 2, @@ -22682,9 +22724,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRLVW128", + name: "VPSRAVW128", argLen: 2, - asm: x86.AVPSRLVW, + asm: x86.AVPSRAVW, reg: regInfo{ inputs: []inputInfo{ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 @@ -22728,35 +22770,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "VPSRLVWMasked128", - argLen: 3, - asm: x86.AVPSRLVW, - reg: regInfo{ - inputs: []inputInfo{ - {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, - { - name: "VPSRAVW128", - argLen: 2, - asm: x86.AVPSRAVW, - reg: regInfo{ - inputs: []inputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {1, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, { name: "VPSRAVWMasked128", argLen: 3, @@ -23241,6 +23254,64 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPSLLD512", + argLen: 2, + asm: x86.AVPSLLD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPSLLDMasked512", + argLen: 3, + asm: x86.AVPSLLD, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPSRAD512", + argLen: 2, + asm: x86.AVPSRAD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPSRADMasked512", + argLen: 3, + asm: x86.AVPSRAD, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, { name: "VPSLLVD512", argLen: 2, @@ -23304,9 +23375,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRLVD512", + name: "VPSRAVD512", argLen: 2, - asm: x86.AVPSRLVD, + asm: x86.AVPSRAVD, reg: regInfo{ inputs: []inputInfo{ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 @@ -23350,35 +23421,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "VPSRLVDMasked512", - argLen: 3, - asm: x86.AVPSRLVD, - reg: regInfo{ - inputs: []inputInfo{ - {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, - { - name: "VPSRAVD512", - argLen: 2, - asm: x86.AVPSRAVD, - reg: regInfo{ - inputs: []inputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {1, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, { name: "VPSRAVDMasked512", argLen: 3, @@ -23956,16 +23998,17 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRLD128", - argLen: 2, - asm: x86.AVPSRLD, + name: "VPSLLDMasked128", + argLen: 3, + asm: x86.AVPSLLD, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 }, outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 }, }, }, @@ -23983,6 +24026,21 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPSRADMasked128", + argLen: 3, + asm: x86.AVPSRAD, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, { name: "VPSLLVD128", argLen: 2, @@ -24046,9 +24104,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRLVD128", + name: "VPSRAVD128", argLen: 2, - asm: x86.AVPSRLVD, + asm: x86.AVPSRAVD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -24092,35 +24150,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "VPSRLVDMasked128", - argLen: 3, - asm: x86.AVPSRLVD, - reg: regInfo{ - inputs: []inputInfo{ - {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, - { - name: "VPSRAVD128", - argLen: 2, - asm: x86.AVPSRAVD, - reg: regInfo{ - inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, { name: "VPSRAVDMasked128", argLen: 3, @@ -24697,16 +24726,17 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRLD256", - argLen: 2, - asm: x86.AVPSRLD, + name: "VPSLLDMasked256", + argLen: 3, + asm: x86.AVPSLLD, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 }, outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 }, }, }, @@ -24724,6 +24754,21 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPSRADMasked256", + argLen: 3, + asm: x86.AVPSRAD, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, { name: "VPSLLVD256", argLen: 2, @@ -24787,9 +24832,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRLVD256", + name: "VPSRAVD256", argLen: 2, - asm: x86.AVPSRLVD, + asm: x86.AVPSRAVD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -24833,35 +24878,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "VPSRLVDMasked256", - argLen: 3, - asm: x86.AVPSRLVD, - reg: regInfo{ - inputs: []inputInfo{ - {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, - { - name: "VPSRAVD256", - argLen: 2, - asm: x86.AVPSRAVD, - reg: regInfo{ - inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, { name: "VPSRAVDMasked256", argLen: 3, @@ -25326,35 +25342,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "VPSRLQ128", - argLen: 2, - asm: x86.AVPSRLQ, - reg: regInfo{ - inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, - { - name: "VPSRLQMasked128", - argLen: 3, - asm: x86.AVPSRLQ, - reg: regInfo{ - inputs: []inputInfo{ - {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, { name: "VPSRAQ128", argLen: 2, @@ -25447,16 +25434,16 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRLVQ128", + name: "VPSRAVQ128", argLen: 2, - asm: x86.AVPSRLVQ, + asm: x86.AVPSRAVQ, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 }, outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 }, }, }, @@ -25493,35 +25480,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "VPSRLVQMasked128", - argLen: 3, - asm: x86.AVPSRLVQ, - reg: regInfo{ - inputs: []inputInfo{ - {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, - { - name: "VPSRAVQ128", - argLen: 2, - asm: x86.AVPSRAVQ, - reg: regInfo{ - inputs: []inputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {1, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, { name: "VPSRAVQMasked128", argLen: 3, @@ -25939,35 +25897,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "VPSRLQ256", - argLen: 2, - asm: x86.AVPSRLQ, - reg: regInfo{ - inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, - { - name: "VPSRLQMasked256", - argLen: 3, - asm: x86.AVPSRLQ, - reg: regInfo{ - inputs: []inputInfo{ - {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, { name: "VPSRAQ256", argLen: 2, @@ -26060,16 +25989,16 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRLVQ256", + name: "VPSRAVQ256", argLen: 2, - asm: x86.AVPSRLVQ, + asm: x86.AVPSRAVQ, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 }, outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 }, }, }, @@ -26106,35 +26035,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "VPSRLVQMasked256", - argLen: 3, - asm: x86.AVPSRLVQ, - reg: regInfo{ - inputs: []inputInfo{ - {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, - { - name: "VPSRAVQ256", - argLen: 2, - asm: x86.AVPSRAVQ, - reg: regInfo{ - inputs: []inputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {1, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, { name: "VPSRAVQMasked256", argLen: 3, @@ -26582,35 +26482,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "VPSRLQ512", - argLen: 2, - asm: x86.AVPSRLQ, - reg: regInfo{ - inputs: []inputInfo{ - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, - { - name: "VPSRLQMasked512", - argLen: 3, - asm: x86.AVPSRLQ, - reg: regInfo{ - inputs: []inputInfo{ - {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, { name: "VPSRAQ512", argLen: 2, @@ -26703,9 +26574,9 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPSRLVQ512", + name: "VPSRAVQ512", argLen: 2, - asm: x86.AVPSRLVQ, + asm: x86.AVPSRAVQ, reg: regInfo{ inputs: []inputInfo{ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 @@ -26749,35 +26620,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "VPSRLVQMasked512", - argLen: 3, - asm: x86.AVPSRLVQ, - reg: regInfo{ - inputs: []inputInfo{ - {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - }, - }, - { - name: "VPSRAVQ512", - argLen: 2, - asm: x86.AVPSRAVQ, - reg: regInfo{ - inputs: []inputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - {1, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - outputs: []outputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 - }, - }, - }, { name: "VPSRAVQMasked512", argLen: 3, @@ -27889,6 +27731,64 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPSRLW256", + argLen: 2, + asm: x86.AVPSRLW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRLWMasked256", + argLen: 3, + asm: x86.AVPSRLW, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPSRLVW256", + argLen: 2, + asm: x86.AVPSRLVW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPSRLVWMasked256", + argLen: 3, + asm: x86.AVPSRLVW, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPAVGW512", argLen: 2, @@ -28013,6 +27913,64 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPSRLW512", + argLen: 2, + asm: x86.AVPSRLW, + reg: regInfo{ + inputs: []inputInfo{ + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPSRLWMasked512", + argLen: 3, + asm: x86.AVPSRLW, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPSRLVW512", + argLen: 2, + asm: x86.AVPSRLVW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPSRLVWMasked512", + argLen: 3, + asm: x86.AVPSRLVW, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPAVGW128", argLen: 2, @@ -28137,6 +28095,64 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPSRLW128", + argLen: 2, + asm: x86.AVPSRLW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRLWMasked128", + argLen: 3, + asm: x86.AVPSRLW, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPSRLVW128", + argLen: 2, + asm: x86.AVPSRLVW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPSRLVWMasked128", + argLen: 3, + asm: x86.AVPSRLVW, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPMAXUD512", argLen: 2, @@ -28199,6 +28215,64 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPSRLD512", + argLen: 2, + asm: x86.AVPSRLD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPSRLDMasked512", + argLen: 3, + asm: x86.AVPSRLD, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPSRLVD512", + argLen: 2, + asm: x86.AVPSRLVD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPSRLVDMasked512", + argLen: 3, + asm: x86.AVPSRLVD, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPMAXUD128", argLen: 2, @@ -28276,6 +28350,64 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPSRLD128", + argLen: 2, + asm: x86.AVPSRLD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRLDMasked128", + argLen: 3, + asm: x86.AVPSRLD, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPSRLVD128", + argLen: 2, + asm: x86.AVPSRLVD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRLVDMasked128", + argLen: 3, + asm: x86.AVPSRLVD, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPMAXUD256", argLen: 2, @@ -28353,6 +28485,64 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPSRLD256", + argLen: 2, + asm: x86.AVPSRLD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRLDMasked256", + argLen: 3, + asm: x86.AVPSRLD, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPSRLVD256", + argLen: 2, + asm: x86.AVPSRLVD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRLVDMasked256", + argLen: 3, + asm: x86.AVPSRLVD, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPMAXUQ128", argLen: 2, @@ -28431,6 +28621,64 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPSRLQ128", + argLen: 2, + asm: x86.AVPSRLQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRLQMasked128", + argLen: 3, + asm: x86.AVPSRLQ, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPSRLVQ128", + argLen: 2, + asm: x86.AVPSRLVQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRLVQMasked128", + argLen: 3, + asm: x86.AVPSRLVQ, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPMAXUQ256", argLen: 2, @@ -28509,6 +28757,64 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPSRLQ256", + argLen: 2, + asm: x86.AVPSRLQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRLQMasked256", + argLen: 3, + asm: x86.AVPSRLQ, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPSRLVQ256", + argLen: 2, + asm: x86.AVPSRLVQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRLVQMasked256", + argLen: 3, + asm: x86.AVPSRLVQ, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPMAXUQ512", argLen: 2, @@ -28602,6 +28908,64 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPSRLQ512", + argLen: 2, + asm: x86.AVPSRLQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPSRLQMasked512", + argLen: 3, + asm: x86.AVPSRLQ, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPSRLVQ512", + argLen: 2, + asm: x86.AVPSRLVQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPSRLVQMasked512", + argLen: 3, + asm: x86.AVPSRLVQ, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPAVGB128", argLen: 2, @@ -60515,14 +60879,19 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "ShiftAllLeftMaskedInt16x16", + argLen: 3, + generic: true, + }, { name: "ShiftAllRightInt16x16", argLen: 2, generic: true, }, { - name: "ShiftAllRightSignExtendedInt16x16", - argLen: 2, + name: "ShiftAllRightMaskedInt16x16", + argLen: 3, generic: true, }, { @@ -60565,16 +60934,6 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, - { - name: "ShiftRightSignExtendedInt16x16", - argLen: 2, - generic: true, - }, - { - name: "ShiftRightSignExtendedMaskedInt16x16", - argLen: 3, - generic: true, - }, { name: "SignInt16x16", argLen: 2, @@ -60772,6 +61131,26 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, + { + name: "ShiftAllLeftInt16x32", + argLen: 2, + generic: true, + }, + { + name: "ShiftAllLeftMaskedInt16x32", + argLen: 3, + generic: true, + }, + { + name: "ShiftAllRightInt16x32", + argLen: 2, + generic: true, + }, + { + name: "ShiftAllRightMaskedInt16x32", + argLen: 3, + generic: true, + }, { name: "ShiftLeftInt16x32", argLen: 2, @@ -60812,16 +61191,6 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, - { - name: "ShiftRightSignExtendedInt16x32", - argLen: 2, - generic: true, - }, - { - name: "ShiftRightSignExtendedMaskedInt16x32", - argLen: 3, - generic: true, - }, { name: "SubInt16x32", argLen: 2, @@ -61050,14 +61419,19 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "ShiftAllLeftMaskedInt16x8", + argLen: 3, + generic: true, + }, { name: "ShiftAllRightInt16x8", argLen: 2, generic: true, }, { - name: "ShiftAllRightSignExtendedInt16x8", - argLen: 2, + name: "ShiftAllRightMaskedInt16x8", + argLen: 3, generic: true, }, { @@ -61100,16 +61474,6 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, - { - name: "ShiftRightSignExtendedInt16x8", - argLen: 2, - generic: true, - }, - { - name: "ShiftRightSignExtendedMaskedInt16x8", - argLen: 3, - generic: true, - }, { name: "SignInt16x8", argLen: 2, @@ -61347,6 +61711,26 @@ var opcodeTable = [...]opInfo{ argLen: 4, generic: true, }, + { + name: "ShiftAllLeftInt32x16", + argLen: 2, + generic: true, + }, + { + name: "ShiftAllLeftMaskedInt32x16", + argLen: 3, + generic: true, + }, + { + name: "ShiftAllRightInt32x16", + argLen: 2, + generic: true, + }, + { + name: "ShiftAllRightMaskedInt32x16", + argLen: 3, + generic: true, + }, { name: "ShiftLeftInt32x16", argLen: 2, @@ -61387,16 +61771,6 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, - { - name: "ShiftRightSignExtendedInt32x16", - argLen: 2, - generic: true, - }, - { - name: "ShiftRightSignExtendedMaskedInt32x16", - argLen: 3, - generic: true, - }, { name: "SubInt32x16", argLen: 2, @@ -61666,14 +62040,19 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "ShiftAllLeftMaskedInt32x4", + argLen: 3, + generic: true, + }, { name: "ShiftAllRightInt32x4", argLen: 2, generic: true, }, { - name: "ShiftAllRightSignExtendedInt32x4", - argLen: 2, + name: "ShiftAllRightMaskedInt32x4", + argLen: 3, generic: true, }, { @@ -61716,16 +62095,6 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, - { - name: "ShiftRightSignExtendedInt32x4", - argLen: 2, - generic: true, - }, - { - name: "ShiftRightSignExtendedMaskedInt32x4", - argLen: 3, - generic: true, - }, { name: "SignInt32x4", argLen: 2, @@ -62000,14 +62369,19 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "ShiftAllLeftMaskedInt32x8", + argLen: 3, + generic: true, + }, { name: "ShiftAllRightInt32x8", argLen: 2, generic: true, }, { - name: "ShiftAllRightSignExtendedInt32x8", - argLen: 2, + name: "ShiftAllRightMaskedInt32x8", + argLen: 3, generic: true, }, { @@ -62050,16 +62424,6 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, - { - name: "ShiftRightSignExtendedInt32x8", - argLen: 2, - generic: true, - }, - { - name: "ShiftRightSignExtendedMaskedInt32x8", - argLen: 3, - generic: true, - }, { name: "SignInt32x8", argLen: 2, @@ -62315,16 +62679,6 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, - { - name: "ShiftAllRightSignExtendedInt64x2", - argLen: 2, - generic: true, - }, - { - name: "ShiftAllRightSignExtendedMaskedInt64x2", - argLen: 3, - generic: true, - }, { name: "ShiftLeftInt64x2", argLen: 2, @@ -62365,16 +62719,6 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, - { - name: "ShiftRightSignExtendedInt64x2", - argLen: 2, - generic: true, - }, - { - name: "ShiftRightSignExtendedMaskedInt64x2", - argLen: 3, - generic: true, - }, { name: "SubInt64x2", argLen: 2, @@ -62615,16 +62959,6 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, - { - name: "ShiftAllRightSignExtendedInt64x4", - argLen: 2, - generic: true, - }, - { - name: "ShiftAllRightSignExtendedMaskedInt64x4", - argLen: 3, - generic: true, - }, { name: "ShiftLeftInt64x4", argLen: 2, @@ -62665,16 +62999,6 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, - { - name: "ShiftRightSignExtendedInt64x4", - argLen: 2, - generic: true, - }, - { - name: "ShiftRightSignExtendedMaskedInt64x4", - argLen: 3, - generic: true, - }, { name: "SubInt64x4", argLen: 2, @@ -62915,16 +63239,6 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, - { - name: "ShiftAllRightSignExtendedInt64x8", - argLen: 2, - generic: true, - }, - { - name: "ShiftAllRightSignExtendedMaskedInt64x8", - argLen: 3, - generic: true, - }, { name: "ShiftLeftInt64x8", argLen: 2, @@ -62965,16 +63279,6 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, - { - name: "ShiftRightSignExtendedInt64x8", - argLen: 2, - generic: true, - }, - { - name: "ShiftRightSignExtendedMaskedInt64x8", - argLen: 3, - generic: true, - }, { name: "SubInt64x8", argLen: 2, @@ -63697,11 +64001,21 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "ShiftAllLeftMaskedUint16x16", + argLen: 3, + generic: true, + }, { name: "ShiftAllRightUint16x16", argLen: 2, generic: true, }, + { + name: "ShiftAllRightMaskedUint16x16", + argLen: 3, + generic: true, + }, { name: "ShiftLeftUint16x16", argLen: 2, @@ -63742,16 +64056,6 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, - { - name: "ShiftRightSignExtendedUint16x16", - argLen: 2, - generic: true, - }, - { - name: "ShiftRightSignExtendedMaskedUint16x16", - argLen: 3, - generic: true, - }, { name: "SubUint16x16", argLen: 2, @@ -63924,6 +64228,26 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, + { + name: "ShiftAllLeftUint16x32", + argLen: 2, + generic: true, + }, + { + name: "ShiftAllLeftMaskedUint16x32", + argLen: 3, + generic: true, + }, + { + name: "ShiftAllRightUint16x32", + argLen: 2, + generic: true, + }, + { + name: "ShiftAllRightMaskedUint16x32", + argLen: 3, + generic: true, + }, { name: "ShiftLeftUint16x32", argLen: 2, @@ -63964,16 +64288,6 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, - { - name: "ShiftRightSignExtendedUint16x32", - argLen: 2, - generic: true, - }, - { - name: "ShiftRightSignExtendedMaskedUint16x32", - argLen: 3, - generic: true, - }, { name: "SubUint16x32", argLen: 2, @@ -64172,11 +64486,21 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "ShiftAllLeftMaskedUint16x8", + argLen: 3, + generic: true, + }, { name: "ShiftAllRightUint16x8", argLen: 2, generic: true, }, + { + name: "ShiftAllRightMaskedUint16x8", + argLen: 3, + generic: true, + }, { name: "ShiftLeftUint16x8", argLen: 2, @@ -64217,16 +64541,6 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, - { - name: "ShiftRightSignExtendedUint16x8", - argLen: 2, - generic: true, - }, - { - name: "ShiftRightSignExtendedMaskedUint16x8", - argLen: 3, - generic: true, - }, { name: "SubUint16x8", argLen: 2, @@ -64417,6 +64731,26 @@ var opcodeTable = [...]opInfo{ argLen: 4, generic: true, }, + { + name: "ShiftAllLeftUint32x16", + argLen: 2, + generic: true, + }, + { + name: "ShiftAllLeftMaskedUint32x16", + argLen: 3, + generic: true, + }, + { + name: "ShiftAllRightUint32x16", + argLen: 2, + generic: true, + }, + { + name: "ShiftAllRightMaskedUint32x16", + argLen: 3, + generic: true, + }, { name: "ShiftLeftUint32x16", argLen: 2, @@ -64457,16 +64791,6 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, - { - name: "ShiftRightSignExtendedUint32x16", - argLen: 2, - generic: true, - }, - { - name: "ShiftRightSignExtendedMaskedUint32x16", - argLen: 3, - generic: true, - }, { name: "SubUint32x16", argLen: 2, @@ -64694,11 +65018,21 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "ShiftAllLeftMaskedUint32x4", + argLen: 3, + generic: true, + }, { name: "ShiftAllRightUint32x4", argLen: 2, generic: true, }, + { + name: "ShiftAllRightMaskedUint32x4", + argLen: 3, + generic: true, + }, { name: "ShiftLeftUint32x4", argLen: 2, @@ -64739,16 +65073,6 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, - { - name: "ShiftRightSignExtendedUint32x4", - argLen: 2, - generic: true, - }, - { - name: "ShiftRightSignExtendedMaskedUint32x4", - argLen: 3, - generic: true, - }, { name: "SubUint32x4", argLen: 2, @@ -64976,11 +65300,21 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "ShiftAllLeftMaskedUint32x8", + argLen: 3, + generic: true, + }, { name: "ShiftAllRightUint32x8", argLen: 2, generic: true, }, + { + name: "ShiftAllRightMaskedUint32x8", + argLen: 3, + generic: true, + }, { name: "ShiftLeftUint32x8", argLen: 2, @@ -65021,16 +65355,6 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, - { - name: "ShiftRightSignExtendedUint32x8", - argLen: 2, - generic: true, - }, - { - name: "ShiftRightSignExtendedMaskedUint32x8", - argLen: 3, - generic: true, - }, { name: "SubUint32x8", argLen: 2, @@ -65299,16 +65623,6 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, - { - name: "ShiftRightSignExtendedUint64x2", - argLen: 2, - generic: true, - }, - { - name: "ShiftRightSignExtendedMaskedUint64x2", - argLen: 3, - generic: true, - }, { name: "SubUint64x2", argLen: 2, @@ -65567,16 +65881,6 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, - { - name: "ShiftRightSignExtendedUint64x4", - argLen: 2, - generic: true, - }, - { - name: "ShiftRightSignExtendedMaskedUint64x4", - argLen: 3, - generic: true, - }, { name: "SubUint64x4", argLen: 2, @@ -65835,16 +66139,6 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, - { - name: "ShiftRightSignExtendedUint64x8", - argLen: 2, - generic: true, - }, - { - name: "ShiftRightSignExtendedMaskedUint64x8", - argLen: 3, - generic: true, - }, { name: "SubUint64x8", argLen: 2, diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index d258b3bd0e..d78c9212cb 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -4131,9 +4131,15 @@ func rewriteValueAMD64(v *Value) bool { case OpShiftAllLeftInt16x16: v.Op = OpAMD64VPSLLW256 return true + case OpShiftAllLeftInt16x32: + v.Op = OpAMD64VPSLLW512 + return true case OpShiftAllLeftInt16x8: v.Op = OpAMD64VPSLLW128 return true + case OpShiftAllLeftInt32x16: + v.Op = OpAMD64VPSLLD512 + return true case OpShiftAllLeftInt32x4: v.Op = OpAMD64VPSLLD128 return true @@ -4149,12 +4155,36 @@ func rewriteValueAMD64(v *Value) bool { case OpShiftAllLeftInt64x8: v.Op = OpAMD64VPSLLQ512 return true + case OpShiftAllLeftMaskedInt16x16: + return rewriteValueAMD64_OpShiftAllLeftMaskedInt16x16(v) + case OpShiftAllLeftMaskedInt16x32: + return rewriteValueAMD64_OpShiftAllLeftMaskedInt16x32(v) + case OpShiftAllLeftMaskedInt16x8: + return rewriteValueAMD64_OpShiftAllLeftMaskedInt16x8(v) + case OpShiftAllLeftMaskedInt32x16: + return rewriteValueAMD64_OpShiftAllLeftMaskedInt32x16(v) + case OpShiftAllLeftMaskedInt32x4: + return rewriteValueAMD64_OpShiftAllLeftMaskedInt32x4(v) + case OpShiftAllLeftMaskedInt32x8: + return rewriteValueAMD64_OpShiftAllLeftMaskedInt32x8(v) case OpShiftAllLeftMaskedInt64x2: return rewriteValueAMD64_OpShiftAllLeftMaskedInt64x2(v) case OpShiftAllLeftMaskedInt64x4: return rewriteValueAMD64_OpShiftAllLeftMaskedInt64x4(v) case OpShiftAllLeftMaskedInt64x8: return rewriteValueAMD64_OpShiftAllLeftMaskedInt64x8(v) + case OpShiftAllLeftMaskedUint16x16: + return rewriteValueAMD64_OpShiftAllLeftMaskedUint16x16(v) + case OpShiftAllLeftMaskedUint16x32: + return rewriteValueAMD64_OpShiftAllLeftMaskedUint16x32(v) + case OpShiftAllLeftMaskedUint16x8: + return rewriteValueAMD64_OpShiftAllLeftMaskedUint16x8(v) + case OpShiftAllLeftMaskedUint32x16: + return rewriteValueAMD64_OpShiftAllLeftMaskedUint32x16(v) + case OpShiftAllLeftMaskedUint32x4: + return rewriteValueAMD64_OpShiftAllLeftMaskedUint32x4(v) + case OpShiftAllLeftMaskedUint32x8: + return rewriteValueAMD64_OpShiftAllLeftMaskedUint32x8(v) case OpShiftAllLeftMaskedUint64x2: return rewriteValueAMD64_OpShiftAllLeftMaskedUint64x2(v) case OpShiftAllLeftMaskedUint64x4: @@ -4164,9 +4194,15 @@ func rewriteValueAMD64(v *Value) bool { case OpShiftAllLeftUint16x16: v.Op = OpAMD64VPSLLW256 return true + case OpShiftAllLeftUint16x32: + v.Op = OpAMD64VPSLLW512 + return true case OpShiftAllLeftUint16x8: v.Op = OpAMD64VPSLLW128 return true + case OpShiftAllLeftUint32x16: + v.Op = OpAMD64VPSLLD512 + return true case OpShiftAllLeftUint32x4: v.Op = OpAMD64VPSLLD128 return true @@ -4273,71 +4309,80 @@ func rewriteValueAMD64(v *Value) bool { v.Op = OpAMD64VPSHRDQ512 return true case OpShiftAllRightInt16x16: - v.Op = OpAMD64VPSRLW256 + v.Op = OpAMD64VPSRAW256 + return true + case OpShiftAllRightInt16x32: + v.Op = OpAMD64VPSRAW512 return true case OpShiftAllRightInt16x8: - v.Op = OpAMD64VPSRLW128 + v.Op = OpAMD64VPSRAW128 + return true + case OpShiftAllRightInt32x16: + v.Op = OpAMD64VPSRAD512 return true case OpShiftAllRightInt32x4: - v.Op = OpAMD64VPSRLD128 + v.Op = OpAMD64VPSRAD128 return true case OpShiftAllRightInt32x8: - v.Op = OpAMD64VPSRLD256 + v.Op = OpAMD64VPSRAD256 return true case OpShiftAllRightInt64x2: - v.Op = OpAMD64VPSRLQ128 + v.Op = OpAMD64VPSRAQ128 return true case OpShiftAllRightInt64x4: - v.Op = OpAMD64VPSRLQ256 + v.Op = OpAMD64VPSRAQ256 return true case OpShiftAllRightInt64x8: - v.Op = OpAMD64VPSRLQ512 + v.Op = OpAMD64VPSRAQ512 return true + case OpShiftAllRightMaskedInt16x16: + return rewriteValueAMD64_OpShiftAllRightMaskedInt16x16(v) + case OpShiftAllRightMaskedInt16x32: + return rewriteValueAMD64_OpShiftAllRightMaskedInt16x32(v) + case OpShiftAllRightMaskedInt16x8: + return rewriteValueAMD64_OpShiftAllRightMaskedInt16x8(v) + case OpShiftAllRightMaskedInt32x16: + return rewriteValueAMD64_OpShiftAllRightMaskedInt32x16(v) + case OpShiftAllRightMaskedInt32x4: + return rewriteValueAMD64_OpShiftAllRightMaskedInt32x4(v) + case OpShiftAllRightMaskedInt32x8: + return rewriteValueAMD64_OpShiftAllRightMaskedInt32x8(v) case OpShiftAllRightMaskedInt64x2: return rewriteValueAMD64_OpShiftAllRightMaskedInt64x2(v) case OpShiftAllRightMaskedInt64x4: return rewriteValueAMD64_OpShiftAllRightMaskedInt64x4(v) case OpShiftAllRightMaskedInt64x8: return rewriteValueAMD64_OpShiftAllRightMaskedInt64x8(v) + case OpShiftAllRightMaskedUint16x16: + return rewriteValueAMD64_OpShiftAllRightMaskedUint16x16(v) + case OpShiftAllRightMaskedUint16x32: + return rewriteValueAMD64_OpShiftAllRightMaskedUint16x32(v) + case OpShiftAllRightMaskedUint16x8: + return rewriteValueAMD64_OpShiftAllRightMaskedUint16x8(v) + case OpShiftAllRightMaskedUint32x16: + return rewriteValueAMD64_OpShiftAllRightMaskedUint32x16(v) + case OpShiftAllRightMaskedUint32x4: + return rewriteValueAMD64_OpShiftAllRightMaskedUint32x4(v) + case OpShiftAllRightMaskedUint32x8: + return rewriteValueAMD64_OpShiftAllRightMaskedUint32x8(v) case OpShiftAllRightMaskedUint64x2: return rewriteValueAMD64_OpShiftAllRightMaskedUint64x2(v) case OpShiftAllRightMaskedUint64x4: return rewriteValueAMD64_OpShiftAllRightMaskedUint64x4(v) case OpShiftAllRightMaskedUint64x8: return rewriteValueAMD64_OpShiftAllRightMaskedUint64x8(v) - case OpShiftAllRightSignExtendedInt16x16: - v.Op = OpAMD64VPSRAW256 - return true - case OpShiftAllRightSignExtendedInt16x8: - v.Op = OpAMD64VPSRAW128 - return true - case OpShiftAllRightSignExtendedInt32x4: - v.Op = OpAMD64VPSRAD128 - return true - case OpShiftAllRightSignExtendedInt32x8: - v.Op = OpAMD64VPSRAD256 - return true - case OpShiftAllRightSignExtendedInt64x2: - v.Op = OpAMD64VPSRAQ128 - return true - case OpShiftAllRightSignExtendedInt64x4: - v.Op = OpAMD64VPSRAQ256 - return true - case OpShiftAllRightSignExtendedInt64x8: - v.Op = OpAMD64VPSRAQ512 - return true - case OpShiftAllRightSignExtendedMaskedInt64x2: - return rewriteValueAMD64_OpShiftAllRightSignExtendedMaskedInt64x2(v) - case OpShiftAllRightSignExtendedMaskedInt64x4: - return rewriteValueAMD64_OpShiftAllRightSignExtendedMaskedInt64x4(v) - case OpShiftAllRightSignExtendedMaskedInt64x8: - return rewriteValueAMD64_OpShiftAllRightSignExtendedMaskedInt64x8(v) case OpShiftAllRightUint16x16: v.Op = OpAMD64VPSRLW256 return true + case OpShiftAllRightUint16x32: + v.Op = OpAMD64VPSRLW512 + return true case OpShiftAllRightUint16x8: v.Op = OpAMD64VPSRLW128 return true + case OpShiftAllRightUint32x16: + v.Op = OpAMD64VPSRLD512 + return true case OpShiftAllRightUint32x4: v.Op = OpAMD64VPSRLD128 return true @@ -4624,31 +4669,31 @@ func rewriteValueAMD64(v *Value) bool { v.Op = OpAMD64VPSHRDVQ512 return true case OpShiftRightInt16x16: - v.Op = OpAMD64VPSRLVW256 + v.Op = OpAMD64VPSRAVW256 return true case OpShiftRightInt16x32: - v.Op = OpAMD64VPSRLVW512 + v.Op = OpAMD64VPSRAVW512 return true case OpShiftRightInt16x8: - v.Op = OpAMD64VPSRLVW128 + v.Op = OpAMD64VPSRAVW128 return true case OpShiftRightInt32x16: - v.Op = OpAMD64VPSRLVD512 + v.Op = OpAMD64VPSRAVD512 return true case OpShiftRightInt32x4: - v.Op = OpAMD64VPSRLVD128 + v.Op = OpAMD64VPSRAVD128 return true case OpShiftRightInt32x8: - v.Op = OpAMD64VPSRLVD256 + v.Op = OpAMD64VPSRAVD256 return true case OpShiftRightInt64x2: - v.Op = OpAMD64VPSRLVQ128 + v.Op = OpAMD64VPSRAVQ128 return true case OpShiftRightInt64x4: - v.Op = OpAMD64VPSRLVQ256 + v.Op = OpAMD64VPSRAVQ256 return true case OpShiftRightInt64x8: - v.Op = OpAMD64VPSRLVQ512 + v.Op = OpAMD64VPSRAVQ512 return true case OpShiftRightMaskedInt16x16: return rewriteValueAMD64_OpShiftRightMaskedInt16x16(v) @@ -4686,96 +4731,6 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpShiftRightMaskedUint64x4(v) case OpShiftRightMaskedUint64x8: return rewriteValueAMD64_OpShiftRightMaskedUint64x8(v) - case OpShiftRightSignExtendedInt16x16: - v.Op = OpAMD64VPSRAVW256 - return true - case OpShiftRightSignExtendedInt16x32: - v.Op = OpAMD64VPSRAVW512 - return true - case OpShiftRightSignExtendedInt16x8: - v.Op = OpAMD64VPSRAVW128 - return true - case OpShiftRightSignExtendedInt32x16: - v.Op = OpAMD64VPSRAVD512 - return true - case OpShiftRightSignExtendedInt32x4: - v.Op = OpAMD64VPSRAVD128 - return true - case OpShiftRightSignExtendedInt32x8: - v.Op = OpAMD64VPSRAVD256 - return true - case OpShiftRightSignExtendedInt64x2: - v.Op = OpAMD64VPSRAVQ128 - return true - case OpShiftRightSignExtendedInt64x4: - v.Op = OpAMD64VPSRAVQ256 - return true - case OpShiftRightSignExtendedInt64x8: - v.Op = OpAMD64VPSRAVQ512 - return true - case OpShiftRightSignExtendedMaskedInt16x16: - return rewriteValueAMD64_OpShiftRightSignExtendedMaskedInt16x16(v) - case OpShiftRightSignExtendedMaskedInt16x32: - return rewriteValueAMD64_OpShiftRightSignExtendedMaskedInt16x32(v) - case OpShiftRightSignExtendedMaskedInt16x8: - return rewriteValueAMD64_OpShiftRightSignExtendedMaskedInt16x8(v) - case OpShiftRightSignExtendedMaskedInt32x16: - return rewriteValueAMD64_OpShiftRightSignExtendedMaskedInt32x16(v) - case OpShiftRightSignExtendedMaskedInt32x4: - return rewriteValueAMD64_OpShiftRightSignExtendedMaskedInt32x4(v) - case OpShiftRightSignExtendedMaskedInt32x8: - return rewriteValueAMD64_OpShiftRightSignExtendedMaskedInt32x8(v) - case OpShiftRightSignExtendedMaskedInt64x2: - return rewriteValueAMD64_OpShiftRightSignExtendedMaskedInt64x2(v) - case OpShiftRightSignExtendedMaskedInt64x4: - return rewriteValueAMD64_OpShiftRightSignExtendedMaskedInt64x4(v) - case OpShiftRightSignExtendedMaskedInt64x8: - return rewriteValueAMD64_OpShiftRightSignExtendedMaskedInt64x8(v) - case OpShiftRightSignExtendedMaskedUint16x16: - return rewriteValueAMD64_OpShiftRightSignExtendedMaskedUint16x16(v) - case OpShiftRightSignExtendedMaskedUint16x32: - return rewriteValueAMD64_OpShiftRightSignExtendedMaskedUint16x32(v) - case OpShiftRightSignExtendedMaskedUint16x8: - return rewriteValueAMD64_OpShiftRightSignExtendedMaskedUint16x8(v) - case OpShiftRightSignExtendedMaskedUint32x16: - return rewriteValueAMD64_OpShiftRightSignExtendedMaskedUint32x16(v) - case OpShiftRightSignExtendedMaskedUint32x4: - return rewriteValueAMD64_OpShiftRightSignExtendedMaskedUint32x4(v) - case OpShiftRightSignExtendedMaskedUint32x8: - return rewriteValueAMD64_OpShiftRightSignExtendedMaskedUint32x8(v) - case OpShiftRightSignExtendedMaskedUint64x2: - return rewriteValueAMD64_OpShiftRightSignExtendedMaskedUint64x2(v) - case OpShiftRightSignExtendedMaskedUint64x4: - return rewriteValueAMD64_OpShiftRightSignExtendedMaskedUint64x4(v) - case OpShiftRightSignExtendedMaskedUint64x8: - return rewriteValueAMD64_OpShiftRightSignExtendedMaskedUint64x8(v) - case OpShiftRightSignExtendedUint16x16: - v.Op = OpAMD64VPSRAVW256 - return true - case OpShiftRightSignExtendedUint16x32: - v.Op = OpAMD64VPSRAVW512 - return true - case OpShiftRightSignExtendedUint16x8: - v.Op = OpAMD64VPSRAVW128 - return true - case OpShiftRightSignExtendedUint32x16: - v.Op = OpAMD64VPSRAVD512 - return true - case OpShiftRightSignExtendedUint32x4: - v.Op = OpAMD64VPSRAVD128 - return true - case OpShiftRightSignExtendedUint32x8: - v.Op = OpAMD64VPSRAVD256 - return true - case OpShiftRightSignExtendedUint64x2: - v.Op = OpAMD64VPSRAVQ128 - return true - case OpShiftRightSignExtendedUint64x4: - v.Op = OpAMD64VPSRAVQ256 - return true - case OpShiftRightSignExtendedUint64x8: - v.Op = OpAMD64VPSRAVQ512 - return true case OpShiftRightUint16x16: v.Op = OpAMD64VPSRLVW256 return true @@ -48631,6 +48586,114 @@ func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedUint64x8(v *Value) bo return true } } +func rewriteValueAMD64_OpShiftAllLeftMaskedInt16x16(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftAllLeftMaskedInt16x16 x y mask) + // result: (VPSLLWMasked256 x y (VPMOVVec16x16ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSLLWMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpShiftAllLeftMaskedInt16x32(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftAllLeftMaskedInt16x32 x y mask) + // result: (VPSLLWMasked512 x y (VPMOVVec16x32ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSLLWMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpShiftAllLeftMaskedInt16x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftAllLeftMaskedInt16x8 x y mask) + // result: (VPSLLWMasked128 x y (VPMOVVec16x8ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSLLWMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpShiftAllLeftMaskedInt32x16(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftAllLeftMaskedInt32x16 x y mask) + // result: (VPSLLDMasked512 x y (VPMOVVec32x16ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSLLDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpShiftAllLeftMaskedInt32x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftAllLeftMaskedInt32x4 x y mask) + // result: (VPSLLDMasked128 x y (VPMOVVec32x4ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSLLDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpShiftAllLeftMaskedInt32x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftAllLeftMaskedInt32x8 x y mask) + // result: (VPSLLDMasked256 x y (VPMOVVec32x8ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSLLDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} func rewriteValueAMD64_OpShiftAllLeftMaskedInt64x2(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] @@ -48685,6 +48748,114 @@ func rewriteValueAMD64_OpShiftAllLeftMaskedInt64x8(v *Value) bool { return true } } +func rewriteValueAMD64_OpShiftAllLeftMaskedUint16x16(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftAllLeftMaskedUint16x16 x y mask) + // result: (VPSLLWMasked256 x y (VPMOVVec16x16ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSLLWMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpShiftAllLeftMaskedUint16x32(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftAllLeftMaskedUint16x32 x y mask) + // result: (VPSLLWMasked512 x y (VPMOVVec16x32ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSLLWMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpShiftAllLeftMaskedUint16x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftAllLeftMaskedUint16x8 x y mask) + // result: (VPSLLWMasked128 x y (VPMOVVec16x8ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSLLWMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpShiftAllLeftMaskedUint32x16(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftAllLeftMaskedUint32x16 x y mask) + // result: (VPSLLDMasked512 x y (VPMOVVec32x16ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSLLDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpShiftAllLeftMaskedUint32x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftAllLeftMaskedUint32x4 x y mask) + // result: (VPSLLDMasked128 x y (VPMOVVec32x4ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSLLDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpShiftAllLeftMaskedUint32x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftAllLeftMaskedUint32x8 x y mask) + // result: (VPSLLDMasked256 x y (VPMOVVec32x8ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSLLDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} func rewriteValueAMD64_OpShiftAllLeftMaskedUint64x2(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] @@ -49099,18 +49270,126 @@ func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedUint64x8(v *Value) b return true } } +func rewriteValueAMD64_OpShiftAllRightMaskedInt16x16(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftAllRightMaskedInt16x16 x y mask) + // result: (VPSRAWMasked256 x y (VPMOVVec16x16ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRAWMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpShiftAllRightMaskedInt16x32(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftAllRightMaskedInt16x32 x y mask) + // result: (VPSRAWMasked512 x y (VPMOVVec16x32ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRAWMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpShiftAllRightMaskedInt16x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftAllRightMaskedInt16x8 x y mask) + // result: (VPSRAWMasked128 x y (VPMOVVec16x8ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRAWMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpShiftAllRightMaskedInt32x16(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftAllRightMaskedInt32x16 x y mask) + // result: (VPSRADMasked512 x y (VPMOVVec32x16ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRADMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpShiftAllRightMaskedInt32x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftAllRightMaskedInt32x4 x y mask) + // result: (VPSRADMasked128 x y (VPMOVVec32x4ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRADMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpShiftAllRightMaskedInt32x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftAllRightMaskedInt32x8 x y mask) + // result: (VPSRADMasked256 x y (VPMOVVec32x8ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRADMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} func rewriteValueAMD64_OpShiftAllRightMaskedInt64x2(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block // match: (ShiftAllRightMaskedInt64x2 x y mask) - // result: (VPSRLQMasked128 x y (VPMOVVec64x2ToM mask)) + // result: (VPSRAQMasked128 x y (VPMOVVec64x2ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSRLQMasked128) + v.reset(OpAMD64VPSRAQMasked128) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) @@ -49123,12 +49402,12 @@ func rewriteValueAMD64_OpShiftAllRightMaskedInt64x4(v *Value) bool { v_0 := v.Args[0] b := v.Block // match: (ShiftAllRightMaskedInt64x4 x y mask) - // result: (VPSRLQMasked256 x y (VPMOVVec64x4ToM mask)) + // result: (VPSRAQMasked256 x y (VPMOVVec64x4ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSRLQMasked256) + v.reset(OpAMD64VPSRAQMasked256) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) @@ -49141,120 +49420,174 @@ func rewriteValueAMD64_OpShiftAllRightMaskedInt64x8(v *Value) bool { v_0 := v.Args[0] b := v.Block // match: (ShiftAllRightMaskedInt64x8 x y mask) - // result: (VPSRLQMasked512 x y (VPMOVVec64x8ToM mask)) + // result: (VPSRAQMasked512 x y (VPMOVVec64x8ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSRLQMasked512) + v.reset(OpAMD64VPSRAQMasked512) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllRightMaskedUint64x2(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightMaskedUint16x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllRightMaskedUint64x2 x y mask) - // result: (VPSRLQMasked128 x y (VPMOVVec64x2ToM mask)) + // match: (ShiftAllRightMaskedUint16x16 x y mask) + // result: (VPSRLWMasked256 x y (VPMOVVec16x16ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSRLQMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v.reset(OpAMD64VPSRLWMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllRightMaskedUint64x4(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightMaskedUint16x32(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllRightMaskedUint64x4 x y mask) - // result: (VPSRLQMasked256 x y (VPMOVVec64x4ToM mask)) + // match: (ShiftAllRightMaskedUint16x32 x y mask) + // result: (VPSRLWMasked512 x y (VPMOVVec16x32ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSRLQMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VPSRLWMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllRightMaskedUint64x8(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightMaskedUint16x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllRightMaskedUint64x8 x y mask) - // result: (VPSRLQMasked512 x y (VPMOVVec64x8ToM mask)) + // match: (ShiftAllRightMaskedUint16x8 x y mask) + // result: (VPSRLWMasked128 x y (VPMOVVec16x8ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSRLQMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v.reset(OpAMD64VPSRLWMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllRightSignExtendedMaskedInt64x2(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightMaskedUint32x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllRightSignExtendedMaskedInt64x2 x y mask) - // result: (VPSRAQMasked128 x y (VPMOVVec64x2ToM mask)) + // match: (ShiftAllRightMaskedUint32x16 x y mask) + // result: (VPSRLDMasked512 x y (VPMOVVec32x16ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSRAQMasked128) + v.reset(OpAMD64VPSRLDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpShiftAllRightMaskedUint32x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftAllRightMaskedUint32x4 x y mask) + // result: (VPSRLDMasked128 x y (VPMOVVec32x4ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRLDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpShiftAllRightMaskedUint32x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftAllRightMaskedUint32x8 x y mask) + // result: (VPSRLDMasked256 x y (VPMOVVec32x8ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRLDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpShiftAllRightMaskedUint64x2(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftAllRightMaskedUint64x2 x y mask) + // result: (VPSRLQMasked128 x y (VPMOVVec64x2ToM mask)) + for { + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSRLQMasked128) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllRightSignExtendedMaskedInt64x4(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightMaskedUint64x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllRightSignExtendedMaskedInt64x4 x y mask) - // result: (VPSRAQMasked256 x y (VPMOVVec64x4ToM mask)) + // match: (ShiftAllRightMaskedUint64x4 x y mask) + // result: (VPSRLQMasked256 x y (VPMOVVec64x4ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSRAQMasked256) + v.reset(OpAMD64VPSRLQMasked256) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllRightSignExtendedMaskedInt64x8(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightMaskedUint64x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllRightSignExtendedMaskedInt64x8 x y mask) - // result: (VPSRAQMasked512 x y (VPMOVVec64x8ToM mask)) + // match: (ShiftAllRightMaskedUint64x8 x y mask) + // result: (VPSRLQMasked512 x y (VPMOVVec64x8ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSRAQMasked512) + v.reset(OpAMD64VPSRLQMasked512) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) @@ -50311,12 +50644,12 @@ func rewriteValueAMD64_OpShiftRightMaskedInt16x16(v *Value) bool { v_0 := v.Args[0] b := v.Block // match: (ShiftRightMaskedInt16x16 x y mask) - // result: (VPSRLVWMasked256 x y (VPMOVVec16x16ToM mask)) + // result: (VPSRAVWMasked256 x y (VPMOVVec16x16ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSRLVWMasked256) + v.reset(OpAMD64VPSRAVWMasked256) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) @@ -50329,12 +50662,12 @@ func rewriteValueAMD64_OpShiftRightMaskedInt16x32(v *Value) bool { v_0 := v.Args[0] b := v.Block // match: (ShiftRightMaskedInt16x32 x y mask) - // result: (VPSRLVWMasked512 x y (VPMOVVec16x32ToM mask)) + // result: (VPSRAVWMasked512 x y (VPMOVVec16x32ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSRLVWMasked512) + v.reset(OpAMD64VPSRAVWMasked512) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) @@ -50347,12 +50680,12 @@ func rewriteValueAMD64_OpShiftRightMaskedInt16x8(v *Value) bool { v_0 := v.Args[0] b := v.Block // match: (ShiftRightMaskedInt16x8 x y mask) - // result: (VPSRLVWMasked128 x y (VPMOVVec16x8ToM mask)) + // result: (VPSRAVWMasked128 x y (VPMOVVec16x8ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSRLVWMasked128) + v.reset(OpAMD64VPSRAVWMasked128) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) @@ -50365,12 +50698,12 @@ func rewriteValueAMD64_OpShiftRightMaskedInt32x16(v *Value) bool { v_0 := v.Args[0] b := v.Block // match: (ShiftRightMaskedInt32x16 x y mask) - // result: (VPSRLVDMasked512 x y (VPMOVVec32x16ToM mask)) + // result: (VPSRAVDMasked512 x y (VPMOVVec32x16ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSRLVDMasked512) + v.reset(OpAMD64VPSRAVDMasked512) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) @@ -50383,12 +50716,12 @@ func rewriteValueAMD64_OpShiftRightMaskedInt32x4(v *Value) bool { v_0 := v.Args[0] b := v.Block // match: (ShiftRightMaskedInt32x4 x y mask) - // result: (VPSRLVDMasked128 x y (VPMOVVec32x4ToM mask)) + // result: (VPSRAVDMasked128 x y (VPMOVVec32x4ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSRLVDMasked128) + v.reset(OpAMD64VPSRAVDMasked128) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) @@ -50401,12 +50734,12 @@ func rewriteValueAMD64_OpShiftRightMaskedInt32x8(v *Value) bool { v_0 := v.Args[0] b := v.Block // match: (ShiftRightMaskedInt32x8 x y mask) - // result: (VPSRLVDMasked256 x y (VPMOVVec32x8ToM mask)) + // result: (VPSRAVDMasked256 x y (VPMOVVec32x8ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSRLVDMasked256) + v.reset(OpAMD64VPSRAVDMasked256) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) @@ -50419,12 +50752,12 @@ func rewriteValueAMD64_OpShiftRightMaskedInt64x2(v *Value) bool { v_0 := v.Args[0] b := v.Block // match: (ShiftRightMaskedInt64x2 x y mask) - // result: (VPSRLVQMasked128 x y (VPMOVVec64x2ToM mask)) + // result: (VPSRAVQMasked128 x y (VPMOVVec64x2ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSRLVQMasked128) + v.reset(OpAMD64VPSRAVQMasked128) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) @@ -50437,12 +50770,12 @@ func rewriteValueAMD64_OpShiftRightMaskedInt64x4(v *Value) bool { v_0 := v.Args[0] b := v.Block // match: (ShiftRightMaskedInt64x4 x y mask) - // result: (VPSRLVQMasked256 x y (VPMOVVec64x4ToM mask)) + // result: (VPSRAVQMasked256 x y (VPMOVVec64x4ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSRLVQMasked256) + v.reset(OpAMD64VPSRAVQMasked256) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) @@ -50455,12 +50788,12 @@ func rewriteValueAMD64_OpShiftRightMaskedInt64x8(v *Value) bool { v_0 := v.Args[0] b := v.Block // match: (ShiftRightMaskedInt64x8 x y mask) - // result: (VPSRLVQMasked512 x y (VPMOVVec64x8ToM mask)) + // result: (VPSRAVQMasked512 x y (VPMOVVec64x8ToM mask)) for { x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSRLVQMasked512) + v.reset(OpAMD64VPSRAVQMasked512) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) @@ -50629,330 +50962,6 @@ func rewriteValueAMD64_OpShiftRightMaskedUint64x8(v *Value) bool { return true } } -func rewriteValueAMD64_OpShiftRightSignExtendedMaskedInt16x16(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (ShiftRightSignExtendedMaskedInt16x16 x y mask) - // result: (VPSRAVWMasked256 x y (VPMOVVec16x16ToM mask)) - for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSRAVWMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) - return true - } -} -func rewriteValueAMD64_OpShiftRightSignExtendedMaskedInt16x32(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (ShiftRightSignExtendedMaskedInt16x32 x y mask) - // result: (VPSRAVWMasked512 x y (VPMOVVec16x32ToM mask)) - for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSRAVWMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) - return true - } -} -func rewriteValueAMD64_OpShiftRightSignExtendedMaskedInt16x8(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (ShiftRightSignExtendedMaskedInt16x8 x y mask) - // result: (VPSRAVWMasked128 x y (VPMOVVec16x8ToM mask)) - for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSRAVWMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) - return true - } -} -func rewriteValueAMD64_OpShiftRightSignExtendedMaskedInt32x16(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (ShiftRightSignExtendedMaskedInt32x16 x y mask) - // result: (VPSRAVDMasked512 x y (VPMOVVec32x16ToM mask)) - for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSRAVDMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) - return true - } -} -func rewriteValueAMD64_OpShiftRightSignExtendedMaskedInt32x4(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (ShiftRightSignExtendedMaskedInt32x4 x y mask) - // result: (VPSRAVDMasked128 x y (VPMOVVec32x4ToM mask)) - for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSRAVDMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) - return true - } -} -func rewriteValueAMD64_OpShiftRightSignExtendedMaskedInt32x8(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (ShiftRightSignExtendedMaskedInt32x8 x y mask) - // result: (VPSRAVDMasked256 x y (VPMOVVec32x8ToM mask)) - for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSRAVDMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) - return true - } -} -func rewriteValueAMD64_OpShiftRightSignExtendedMaskedInt64x2(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (ShiftRightSignExtendedMaskedInt64x2 x y mask) - // result: (VPSRAVQMasked128 x y (VPMOVVec64x2ToM mask)) - for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSRAVQMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) - return true - } -} -func rewriteValueAMD64_OpShiftRightSignExtendedMaskedInt64x4(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (ShiftRightSignExtendedMaskedInt64x4 x y mask) - // result: (VPSRAVQMasked256 x y (VPMOVVec64x4ToM mask)) - for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSRAVQMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) - return true - } -} -func rewriteValueAMD64_OpShiftRightSignExtendedMaskedInt64x8(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (ShiftRightSignExtendedMaskedInt64x8 x y mask) - // result: (VPSRAVQMasked512 x y (VPMOVVec64x8ToM mask)) - for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSRAVQMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) - return true - } -} -func rewriteValueAMD64_OpShiftRightSignExtendedMaskedUint16x16(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (ShiftRightSignExtendedMaskedUint16x16 x y mask) - // result: (VPSRAVWMasked256 x y (VPMOVVec16x16ToM mask)) - for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSRAVWMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) - return true - } -} -func rewriteValueAMD64_OpShiftRightSignExtendedMaskedUint16x32(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (ShiftRightSignExtendedMaskedUint16x32 x y mask) - // result: (VPSRAVWMasked512 x y (VPMOVVec16x32ToM mask)) - for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSRAVWMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) - return true - } -} -func rewriteValueAMD64_OpShiftRightSignExtendedMaskedUint16x8(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (ShiftRightSignExtendedMaskedUint16x8 x y mask) - // result: (VPSRAVWMasked128 x y (VPMOVVec16x8ToM mask)) - for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSRAVWMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) - return true - } -} -func rewriteValueAMD64_OpShiftRightSignExtendedMaskedUint32x16(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (ShiftRightSignExtendedMaskedUint32x16 x y mask) - // result: (VPSRAVDMasked512 x y (VPMOVVec32x16ToM mask)) - for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSRAVDMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) - return true - } -} -func rewriteValueAMD64_OpShiftRightSignExtendedMaskedUint32x4(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (ShiftRightSignExtendedMaskedUint32x4 x y mask) - // result: (VPSRAVDMasked128 x y (VPMOVVec32x4ToM mask)) - for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSRAVDMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) - return true - } -} -func rewriteValueAMD64_OpShiftRightSignExtendedMaskedUint32x8(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (ShiftRightSignExtendedMaskedUint32x8 x y mask) - // result: (VPSRAVDMasked256 x y (VPMOVVec32x8ToM mask)) - for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSRAVDMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) - return true - } -} -func rewriteValueAMD64_OpShiftRightSignExtendedMaskedUint64x2(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (ShiftRightSignExtendedMaskedUint64x2 x y mask) - // result: (VPSRAVQMasked128 x y (VPMOVVec64x2ToM mask)) - for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSRAVQMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) - return true - } -} -func rewriteValueAMD64_OpShiftRightSignExtendedMaskedUint64x4(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (ShiftRightSignExtendedMaskedUint64x4 x y mask) - // result: (VPSRAVQMasked256 x y (VPMOVVec64x4ToM mask)) - for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSRAVQMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) - return true - } -} -func rewriteValueAMD64_OpShiftRightSignExtendedMaskedUint64x8(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (ShiftRightSignExtendedMaskedUint64x8 x y mask) - // result: (VPSRAVQMasked512 x y (VPMOVVec64x8ToM mask)) - for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSRAVQMasked512) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) - return true - } -} func rewriteValueAMD64_OpSlicemask(v *Value) bool { v_0 := v.Args[0] b := v.Block diff --git a/src/cmd/compile/internal/ssagen/simdintrinsics.go b/src/cmd/compile/internal/ssagen/simdintrinsics.go index ffd341d6ab..085c0b8d99 100644 --- a/src/cmd/compile/internal/ssagen/simdintrinsics.go +++ b/src/cmd/compile/internal/ssagen/simdintrinsics.go @@ -1250,15 +1250,19 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Uint64x2.SetElem", opLen2Imm8(ssa.OpSetElemUint64x2, types.TypeVec128, 0), sys.AMD64) addF(simdPackage, "Int16x8.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftInt16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int16x16.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftInt16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int16x32.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftInt16x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int32x4.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftInt32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int32x8.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x16.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftInt32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int64x2.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftInt64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int64x4.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftInt64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int64x8.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftInt64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint16x8.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftUint16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint16x16.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftUint16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint16x32.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftUint16x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint32x4.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftUint32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint32x8.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftUint32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x16.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftUint32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint64x2.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftUint64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint64x4.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftUint64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint64x8.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftUint64x8, types.TypeVec512), sys.AMD64) @@ -1298,23 +1302,39 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Uint64x2.ShiftAllLeftAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllLeftAndFillUpperFromMaskedUint64x2, types.TypeVec128, 0), sys.AMD64) addF(simdPackage, "Uint64x4.ShiftAllLeftAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllLeftAndFillUpperFromMaskedUint64x4, types.TypeVec256, 0), sys.AMD64) addF(simdPackage, "Uint64x8.ShiftAllLeftAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllLeftAndFillUpperFromMaskedUint64x8, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Int16x8.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x16.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int16x32.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int32x4.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x16.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int64x2.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int64x4.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int64x8.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint16x8.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint16x16.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint16x32.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint32x4.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x8.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x16.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint64x2.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint64x4.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint64x8.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int16x8.ShiftAllRight", opLen2(ssa.OpShiftAllRightInt16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int16x16.ShiftAllRight", opLen2(ssa.OpShiftAllRightInt16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int16x32.ShiftAllRight", opLen2(ssa.OpShiftAllRightInt16x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int32x4.ShiftAllRight", opLen2(ssa.OpShiftAllRightInt32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int32x8.ShiftAllRight", opLen2(ssa.OpShiftAllRightInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x16.ShiftAllRight", opLen2(ssa.OpShiftAllRightInt32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int64x2.ShiftAllRight", opLen2(ssa.OpShiftAllRightInt64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int64x4.ShiftAllRight", opLen2(ssa.OpShiftAllRightInt64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int64x8.ShiftAllRight", opLen2(ssa.OpShiftAllRightInt64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint16x8.ShiftAllRight", opLen2(ssa.OpShiftAllRightUint16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint16x16.ShiftAllRight", opLen2(ssa.OpShiftAllRightUint16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint16x32.ShiftAllRight", opLen2(ssa.OpShiftAllRightUint16x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint32x4.ShiftAllRight", opLen2(ssa.OpShiftAllRightUint32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint32x8.ShiftAllRight", opLen2(ssa.OpShiftAllRightUint32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x16.ShiftAllRight", opLen2(ssa.OpShiftAllRightUint32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint64x2.ShiftAllRight", opLen2(ssa.OpShiftAllRightUint64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint64x4.ShiftAllRight", opLen2(ssa.OpShiftAllRightUint64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint64x8.ShiftAllRight", opLen2(ssa.OpShiftAllRightUint64x8, types.TypeVec512), sys.AMD64) @@ -1354,22 +1374,24 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Uint64x2.ShiftAllRightAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllRightAndFillUpperFromMaskedUint64x2, types.TypeVec128, 0), sys.AMD64) addF(simdPackage, "Uint64x4.ShiftAllRightAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllRightAndFillUpperFromMaskedUint64x4, types.TypeVec256, 0), sys.AMD64) addF(simdPackage, "Uint64x8.ShiftAllRightAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllRightAndFillUpperFromMaskedUint64x8, types.TypeVec512, 0), sys.AMD64) + addF(simdPackage, "Int16x8.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x16.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int16x32.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int32x4.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x8.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x16.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int64x2.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int64x4.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int64x8.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint16x8.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint16x16.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint16x32.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint32x4.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x8.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x16.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint64x2.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint64x4.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint64x8.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int16x8.ShiftAllRightSignExtended", opLen2(ssa.OpShiftAllRightSignExtendedInt16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int16x16.ShiftAllRightSignExtended", opLen2(ssa.OpShiftAllRightSignExtendedInt16x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int32x4.ShiftAllRightSignExtended", opLen2(ssa.OpShiftAllRightSignExtendedInt32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x8.ShiftAllRightSignExtended", opLen2(ssa.OpShiftAllRightSignExtendedInt32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int64x2.ShiftAllRightSignExtended", opLen2(ssa.OpShiftAllRightSignExtendedInt64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int64x4.ShiftAllRightSignExtended", opLen2(ssa.OpShiftAllRightSignExtendedInt64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int64x8.ShiftAllRightSignExtended", opLen2(ssa.OpShiftAllRightSignExtendedInt64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int64x2.ShiftAllRightSignExtendedMasked", opLen3(ssa.OpShiftAllRightSignExtendedMaskedInt64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int64x4.ShiftAllRightSignExtendedMasked", opLen3(ssa.OpShiftAllRightSignExtendedMaskedInt64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int64x8.ShiftAllRightSignExtendedMasked", opLen3(ssa.OpShiftAllRightSignExtendedMaskedInt64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int16x8.ShiftLeft", opLen2(ssa.OpShiftLeftInt16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int16x16.ShiftLeft", opLen2(ssa.OpShiftLeftInt16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int16x32.ShiftLeft", opLen2(ssa.OpShiftLeftInt16x32, types.TypeVec512), sys.AMD64) @@ -1514,42 +1536,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Uint64x2.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint64x4.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint64x8.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int16x8.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedInt16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int16x16.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedInt16x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int16x32.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedInt16x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int32x4.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedInt32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x8.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedInt32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int32x16.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedInt32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int64x2.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedInt64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int64x4.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedInt64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int64x8.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedInt64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint16x8.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedUint16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint16x16.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedUint16x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint16x32.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedUint16x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint32x4.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedUint32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint32x8.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedUint32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint32x16.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedUint32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint64x2.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedUint64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint64x4.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedUint64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint64x8.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedUint64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int16x8.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedInt16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int16x16.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedInt16x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int16x32.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedInt16x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int32x4.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedInt32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x8.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedInt32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int32x16.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedInt32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int64x2.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedInt64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int64x4.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedInt64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int64x8.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedInt64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint16x8.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedUint16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint16x16.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedUint16x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint16x32.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedUint16x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint32x4.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedUint32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint32x8.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedUint32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint32x16.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedUint32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint64x2.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedUint64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint64x4.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedUint64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint64x8.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedUint64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int8x16.Sign", opLen2(ssa.OpSignInt8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int8x32.Sign", opLen2(ssa.OpSignInt8x32, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int16x8.Sign", opLen2(ssa.OpSignInt16x8, types.TypeVec128), sys.AMD64) diff --git a/src/simd/ops_amd64.go b/src/simd/ops_amd64.go index e98aca1abf..38ccfaac8c 100644 --- a/src/simd/ops_amd64.go +++ b/src/simd/ops_amd64.go @@ -6883,6 +6883,11 @@ func (x Int16x8) ShiftAllLeft(y uint64) Int16x8 // Asm: VPSLLW, CPU Feature: AVX2 func (x Int16x16) ShiftAllLeft(y uint64) Int16x16 +// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLW, CPU Feature: AVX512EVEX +func (x Int16x32) ShiftAllLeft(y uint64) Int16x32 + // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. // // Asm: VPSLLD, CPU Feature: AVX @@ -6893,6 +6898,11 @@ func (x Int32x4) ShiftAllLeft(y uint64) Int32x4 // Asm: VPSLLD, CPU Feature: AVX2 func (x Int32x8) ShiftAllLeft(y uint64) Int32x8 +// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLD, CPU Feature: AVX512EVEX +func (x Int32x16) ShiftAllLeft(y uint64) Int32x16 + // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. // // Asm: VPSLLQ, CPU Feature: AVX @@ -6918,6 +6928,11 @@ func (x Uint16x8) ShiftAllLeft(y uint64) Uint16x8 // Asm: VPSLLW, CPU Feature: AVX2 func (x Uint16x16) ShiftAllLeft(y uint64) Uint16x16 +// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLW, CPU Feature: AVX512EVEX +func (x Uint16x32) ShiftAllLeft(y uint64) Uint16x32 + // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. // // Asm: VPSLLD, CPU Feature: AVX @@ -6928,6 +6943,11 @@ func (x Uint32x4) ShiftAllLeft(y uint64) Uint32x4 // Asm: VPSLLD, CPU Feature: AVX2 func (x Uint32x8) ShiftAllLeft(y uint64) Uint32x8 +// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLD, CPU Feature: AVX512EVEX +func (x Uint32x16) ShiftAllLeft(y uint64) Uint32x16 + // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. // // Asm: VPSLLQ, CPU Feature: AVX @@ -7237,6 +7257,36 @@ func (x Uint64x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint64x8, z /* ShiftAllLeftMasked */ +// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLW, CPU Feature: AVX512EVEX +func (x Int16x8) ShiftAllLeftMasked(y uint64, z Mask16x8) Int16x8 + +// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLW, CPU Feature: AVX512EVEX +func (x Int16x16) ShiftAllLeftMasked(y uint64, z Mask16x16) Int16x16 + +// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLW, CPU Feature: AVX512EVEX +func (x Int16x32) ShiftAllLeftMasked(y uint64, z Mask16x32) Int16x32 + +// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLD, CPU Feature: AVX512EVEX +func (x Int32x4) ShiftAllLeftMasked(y uint64, z Mask32x4) Int32x4 + +// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLD, CPU Feature: AVX512EVEX +func (x Int32x8) ShiftAllLeftMasked(y uint64, z Mask32x8) Int32x8 + +// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLD, CPU Feature: AVX512EVEX +func (x Int32x16) ShiftAllLeftMasked(y uint64, z Mask32x16) Int32x16 + // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. // // Asm: VPSLLQ, CPU Feature: AVX512EVEX @@ -7252,6 +7302,36 @@ func (x Int64x4) ShiftAllLeftMasked(y uint64, z Mask64x4) Int64x4 // Asm: VPSLLQ, CPU Feature: AVX512EVEX func (x Int64x8) ShiftAllLeftMasked(y uint64, z Mask64x8) Int64x8 +// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLW, CPU Feature: AVX512EVEX +func (x Uint16x8) ShiftAllLeftMasked(y uint64, z Mask16x8) Uint16x8 + +// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLW, CPU Feature: AVX512EVEX +func (x Uint16x16) ShiftAllLeftMasked(y uint64, z Mask16x16) Uint16x16 + +// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLW, CPU Feature: AVX512EVEX +func (x Uint16x32) ShiftAllLeftMasked(y uint64, z Mask16x32) Uint16x32 + +// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLD, CPU Feature: AVX512EVEX +func (x Uint32x4) ShiftAllLeftMasked(y uint64, z Mask32x4) Uint32x4 + +// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLD, CPU Feature: AVX512EVEX +func (x Uint32x8) ShiftAllLeftMasked(y uint64, z Mask32x8) Uint32x8 + +// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLD, CPU Feature: AVX512EVEX +func (x Uint32x16) ShiftAllLeftMasked(y uint64, z Mask32x16) Uint32x16 + // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. // // Asm: VPSLLQ, CPU Feature: AVX512EVEX @@ -7269,39 +7349,49 @@ func (x Uint64x8) ShiftAllLeftMasked(y uint64, z Mask64x8) Uint64x8 /* ShiftAllRight */ -// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. // -// Asm: VPSRLW, CPU Feature: AVX +// Asm: VPSRAW, CPU Feature: AVX func (x Int16x8) ShiftAllRight(y uint64) Int16x8 -// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. // -// Asm: VPSRLW, CPU Feature: AVX2 +// Asm: VPSRAW, CPU Feature: AVX2 func (x Int16x16) ShiftAllRight(y uint64) Int16x16 -// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. // -// Asm: VPSRLD, CPU Feature: AVX +// Asm: VPSRAW, CPU Feature: AVX512EVEX +func (x Int16x32) ShiftAllRight(y uint64) Int16x32 + +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAD, CPU Feature: AVX func (x Int32x4) ShiftAllRight(y uint64) Int32x4 -// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. // -// Asm: VPSRLD, CPU Feature: AVX2 +// Asm: VPSRAD, CPU Feature: AVX2 func (x Int32x8) ShiftAllRight(y uint64) Int32x8 -// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. // -// Asm: VPSRLQ, CPU Feature: AVX +// Asm: VPSRAD, CPU Feature: AVX512EVEX +func (x Int32x16) ShiftAllRight(y uint64) Int32x16 + +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAQ, CPU Feature: AVX512EVEX func (x Int64x2) ShiftAllRight(y uint64) Int64x2 -// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. // -// Asm: VPSRLQ, CPU Feature: AVX2 +// Asm: VPSRAQ, CPU Feature: AVX512EVEX func (x Int64x4) ShiftAllRight(y uint64) Int64x4 -// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. // -// Asm: VPSRLQ, CPU Feature: AVX512EVEX +// Asm: VPSRAQ, CPU Feature: AVX512EVEX func (x Int64x8) ShiftAllRight(y uint64) Int64x8 // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. @@ -7314,6 +7404,11 @@ func (x Uint16x8) ShiftAllRight(y uint64) Uint16x8 // Asm: VPSRLW, CPU Feature: AVX2 func (x Uint16x16) ShiftAllRight(y uint64) Uint16x16 +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// +// Asm: VPSRLW, CPU Feature: AVX512EVEX +func (x Uint16x32) ShiftAllRight(y uint64) Uint16x32 + // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. // // Asm: VPSRLD, CPU Feature: AVX @@ -7324,6 +7419,11 @@ func (x Uint32x4) ShiftAllRight(y uint64) Uint32x4 // Asm: VPSRLD, CPU Feature: AVX2 func (x Uint32x8) ShiftAllRight(y uint64) Uint32x8 +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// +// Asm: VPSRLD, CPU Feature: AVX512EVEX +func (x Uint32x16) ShiftAllRight(y uint64) Uint32x16 + // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. // // Asm: VPSRLQ, CPU Feature: AVX @@ -7633,89 +7733,95 @@ func (x Uint64x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint64x8, z /* ShiftAllRightMasked */ -// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. // -// Asm: VPSRLQ, CPU Feature: AVX512EVEX -func (x Int64x2) ShiftAllRightMasked(y uint64, z Mask64x2) Int64x2 +// Asm: VPSRAW, CPU Feature: AVX512EVEX +func (x Int16x8) ShiftAllRightMasked(y uint64, z Mask16x8) Int16x8 -// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. // -// Asm: VPSRLQ, CPU Feature: AVX512EVEX -func (x Int64x4) ShiftAllRightMasked(y uint64, z Mask64x4) Int64x4 +// Asm: VPSRAW, CPU Feature: AVX512EVEX +func (x Int16x16) ShiftAllRightMasked(y uint64, z Mask16x16) Int16x16 -// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. // -// Asm: VPSRLQ, CPU Feature: AVX512EVEX -func (x Int64x8) ShiftAllRightMasked(y uint64, z Mask64x8) Int64x8 +// Asm: VPSRAW, CPU Feature: AVX512EVEX +func (x Int16x32) ShiftAllRightMasked(y uint64, z Mask16x32) Int16x32 -// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. // -// Asm: VPSRLQ, CPU Feature: AVX512EVEX -func (x Uint64x2) ShiftAllRightMasked(y uint64, z Mask64x2) Uint64x2 +// Asm: VPSRAD, CPU Feature: AVX512EVEX +func (x Int32x4) ShiftAllRightMasked(y uint64, z Mask32x4) Int32x4 -// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. // -// Asm: VPSRLQ, CPU Feature: AVX512EVEX -func (x Uint64x4) ShiftAllRightMasked(y uint64, z Mask64x4) Uint64x4 +// Asm: VPSRAD, CPU Feature: AVX512EVEX +func (x Int32x8) ShiftAllRightMasked(y uint64, z Mask32x8) Int32x8 -// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. // -// Asm: VPSRLQ, CPU Feature: AVX512EVEX -func (x Uint64x8) ShiftAllRightMasked(y uint64, z Mask64x8) Uint64x8 +// Asm: VPSRAD, CPU Feature: AVX512EVEX +func (x Int32x16) ShiftAllRightMasked(y uint64, z Mask32x16) Int32x16 -/* ShiftAllRightSignExtended */ +// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAQ, CPU Feature: AVX512EVEX +func (x Int64x2) ShiftAllRightMasked(y uint64, z Mask64x2) Int64x2 -// ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. +// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. // -// Asm: VPSRAW, CPU Feature: AVX -func (x Int16x8) ShiftAllRightSignExtended(y uint64) Int16x8 +// Asm: VPSRAQ, CPU Feature: AVX512EVEX +func (x Int64x4) ShiftAllRightMasked(y uint64, z Mask64x4) Int64x4 -// ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. +// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. // -// Asm: VPSRAW, CPU Feature: AVX2 -func (x Int16x16) ShiftAllRightSignExtended(y uint64) Int16x16 +// Asm: VPSRAQ, CPU Feature: AVX512EVEX +func (x Int64x8) ShiftAllRightMasked(y uint64, z Mask64x8) Int64x8 -// ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. +// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. // -// Asm: VPSRAD, CPU Feature: AVX -func (x Int32x4) ShiftAllRightSignExtended(y uint64) Int32x4 +// Asm: VPSRLW, CPU Feature: AVX512EVEX +func (x Uint16x8) ShiftAllRightMasked(y uint64, z Mask16x8) Uint16x8 -// ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. +// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. // -// Asm: VPSRAD, CPU Feature: AVX2 -func (x Int32x8) ShiftAllRightSignExtended(y uint64) Int32x8 +// Asm: VPSRLW, CPU Feature: AVX512EVEX +func (x Uint16x16) ShiftAllRightMasked(y uint64, z Mask16x16) Uint16x16 -// ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. +// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. // -// Asm: VPSRAQ, CPU Feature: AVX512EVEX -func (x Int64x2) ShiftAllRightSignExtended(y uint64) Int64x2 +// Asm: VPSRLW, CPU Feature: AVX512EVEX +func (x Uint16x32) ShiftAllRightMasked(y uint64, z Mask16x32) Uint16x32 -// ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. +// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. // -// Asm: VPSRAQ, CPU Feature: AVX512EVEX -func (x Int64x4) ShiftAllRightSignExtended(y uint64) Int64x4 +// Asm: VPSRLD, CPU Feature: AVX512EVEX +func (x Uint32x4) ShiftAllRightMasked(y uint64, z Mask32x4) Uint32x4 -// ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. +// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. // -// Asm: VPSRAQ, CPU Feature: AVX512EVEX -func (x Int64x8) ShiftAllRightSignExtended(y uint64) Int64x8 +// Asm: VPSRLD, CPU Feature: AVX512EVEX +func (x Uint32x8) ShiftAllRightMasked(y uint64, z Mask32x8) Uint32x8 -/* ShiftAllRightSignExtendedMasked */ +// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// +// Asm: VPSRLD, CPU Feature: AVX512EVEX +func (x Uint32x16) ShiftAllRightMasked(y uint64, z Mask32x16) Uint32x16 -// ShiftAllRightSignExtendedMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. +// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. // -// Asm: VPSRAQ, CPU Feature: AVX512EVEX -func (x Int64x2) ShiftAllRightSignExtendedMasked(y uint64, z Mask64x2) Int64x2 +// Asm: VPSRLQ, CPU Feature: AVX512EVEX +func (x Uint64x2) ShiftAllRightMasked(y uint64, z Mask64x2) Uint64x2 -// ShiftAllRightSignExtendedMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. +// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. // -// Asm: VPSRAQ, CPU Feature: AVX512EVEX -func (x Int64x4) ShiftAllRightSignExtendedMasked(y uint64, z Mask64x4) Int64x4 +// Asm: VPSRLQ, CPU Feature: AVX512EVEX +func (x Uint64x4) ShiftAllRightMasked(y uint64, z Mask64x4) Uint64x4 -// ShiftAllRightSignExtendedMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. +// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. // -// Asm: VPSRAQ, CPU Feature: AVX512EVEX -func (x Int64x8) ShiftAllRightSignExtendedMasked(y uint64, z Mask64x8) Int64x8 +// Asm: VPSRLQ, CPU Feature: AVX512EVEX +func (x Uint64x8) ShiftAllRightMasked(y uint64, z Mask64x8) Uint64x8 /* ShiftLeft */ @@ -8123,49 +8229,49 @@ func (x Uint64x8) ShiftLeftMasked(y Uint64x8, z Mask64x8) Uint64x8 /* ShiftRight */ -// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. // -// Asm: VPSRLVW, CPU Feature: AVX512EVEX +// Asm: VPSRAVW, CPU Feature: AVX512EVEX func (x Int16x8) ShiftRight(y Int16x8) Int16x8 -// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. // -// Asm: VPSRLVW, CPU Feature: AVX512EVEX +// Asm: VPSRAVW, CPU Feature: AVX512EVEX func (x Int16x16) ShiftRight(y Int16x16) Int16x16 -// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. // -// Asm: VPSRLVW, CPU Feature: AVX512EVEX +// Asm: VPSRAVW, CPU Feature: AVX512EVEX func (x Int16x32) ShiftRight(y Int16x32) Int16x32 -// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. // -// Asm: VPSRLVD, CPU Feature: AVX2 +// Asm: VPSRAVD, CPU Feature: AVX2 func (x Int32x4) ShiftRight(y Int32x4) Int32x4 -// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. // -// Asm: VPSRLVD, CPU Feature: AVX2 +// Asm: VPSRAVD, CPU Feature: AVX2 func (x Int32x8) ShiftRight(y Int32x8) Int32x8 -// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. // -// Asm: VPSRLVD, CPU Feature: AVX512EVEX +// Asm: VPSRAVD, CPU Feature: AVX512EVEX func (x Int32x16) ShiftRight(y Int32x16) Int32x16 -// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. // -// Asm: VPSRLVQ, CPU Feature: AVX2 +// Asm: VPSRAVQ, CPU Feature: AVX512EVEX func (x Int64x2) ShiftRight(y Int64x2) Int64x2 -// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. // -// Asm: VPSRLVQ, CPU Feature: AVX2 +// Asm: VPSRAVQ, CPU Feature: AVX512EVEX func (x Int64x4) ShiftRight(y Int64x4) Int64x4 -// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. // -// Asm: VPSRLVQ, CPU Feature: AVX512EVEX +// Asm: VPSRAVQ, CPU Feature: AVX512EVEX func (x Int64x8) ShiftRight(y Int64x8) Int64x8 // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. @@ -8435,49 +8541,49 @@ func (x Uint64x8) ShiftRightAndFillUpperFromMasked(y Uint64x8, z Uint64x8, u Mas /* ShiftRightMasked */ -// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. // -// Asm: VPSRLVW, CPU Feature: AVX512EVEX +// Asm: VPSRAVW, CPU Feature: AVX512EVEX func (x Int16x8) ShiftRightMasked(y Int16x8, z Mask16x8) Int16x8 -// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. // -// Asm: VPSRLVW, CPU Feature: AVX512EVEX +// Asm: VPSRAVW, CPU Feature: AVX512EVEX func (x Int16x16) ShiftRightMasked(y Int16x16, z Mask16x16) Int16x16 -// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. // -// Asm: VPSRLVW, CPU Feature: AVX512EVEX +// Asm: VPSRAVW, CPU Feature: AVX512EVEX func (x Int16x32) ShiftRightMasked(y Int16x32, z Mask16x32) Int16x32 -// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. // -// Asm: VPSRLVD, CPU Feature: AVX512EVEX +// Asm: VPSRAVD, CPU Feature: AVX512EVEX func (x Int32x4) ShiftRightMasked(y Int32x4, z Mask32x4) Int32x4 -// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. // -// Asm: VPSRLVD, CPU Feature: AVX512EVEX +// Asm: VPSRAVD, CPU Feature: AVX512EVEX func (x Int32x8) ShiftRightMasked(y Int32x8, z Mask32x8) Int32x8 -// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. // -// Asm: VPSRLVD, CPU Feature: AVX512EVEX +// Asm: VPSRAVD, CPU Feature: AVX512EVEX func (x Int32x16) ShiftRightMasked(y Int32x16, z Mask32x16) Int32x16 -// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. // -// Asm: VPSRLVQ, CPU Feature: AVX512EVEX +// Asm: VPSRAVQ, CPU Feature: AVX512EVEX func (x Int64x2) ShiftRightMasked(y Int64x2, z Mask64x2) Int64x2 -// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. // -// Asm: VPSRLVQ, CPU Feature: AVX512EVEX +// Asm: VPSRAVQ, CPU Feature: AVX512EVEX func (x Int64x4) ShiftRightMasked(y Int64x4, z Mask64x4) Int64x4 -// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. // -// Asm: VPSRLVQ, CPU Feature: AVX512EVEX +// Asm: VPSRAVQ, CPU Feature: AVX512EVEX func (x Int64x8) ShiftRightMasked(y Int64x8, z Mask64x8) Int64x8 // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. @@ -8525,190 +8631,6 @@ func (x Uint64x4) ShiftRightMasked(y Uint64x4, z Mask64x4) Uint64x4 // Asm: VPSRLVQ, CPU Feature: AVX512EVEX func (x Uint64x8) ShiftRightMasked(y Uint64x8, z Mask64x8) Uint64x8 -/* ShiftRightSignExtended */ - -// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -// -// Asm: VPSRAVW, CPU Feature: AVX512EVEX -func (x Int16x8) ShiftRightSignExtended(y Int16x8) Int16x8 - -// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -// -// Asm: VPSRAVW, CPU Feature: AVX512EVEX -func (x Int16x16) ShiftRightSignExtended(y Int16x16) Int16x16 - -// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -// -// Asm: VPSRAVW, CPU Feature: AVX512EVEX -func (x Int16x32) ShiftRightSignExtended(y Int16x32) Int16x32 - -// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -// -// Asm: VPSRAVD, CPU Feature: AVX2 -func (x Int32x4) ShiftRightSignExtended(y Int32x4) Int32x4 - -// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -// -// Asm: VPSRAVD, CPU Feature: AVX2 -func (x Int32x8) ShiftRightSignExtended(y Int32x8) Int32x8 - -// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -// -// Asm: VPSRAVD, CPU Feature: AVX512EVEX -func (x Int32x16) ShiftRightSignExtended(y Int32x16) Int32x16 - -// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -// -// Asm: VPSRAVQ, CPU Feature: AVX512EVEX -func (x Int64x2) ShiftRightSignExtended(y Int64x2) Int64x2 - -// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -// -// Asm: VPSRAVQ, CPU Feature: AVX512EVEX -func (x Int64x4) ShiftRightSignExtended(y Int64x4) Int64x4 - -// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -// -// Asm: VPSRAVQ, CPU Feature: AVX512EVEX -func (x Int64x8) ShiftRightSignExtended(y Int64x8) Int64x8 - -// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -// -// Asm: VPSRAVW, CPU Feature: AVX512EVEX -func (x Uint16x8) ShiftRightSignExtended(y Uint16x8) Uint16x8 - -// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -// -// Asm: VPSRAVW, CPU Feature: AVX512EVEX -func (x Uint16x16) ShiftRightSignExtended(y Uint16x16) Uint16x16 - -// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -// -// Asm: VPSRAVW, CPU Feature: AVX512EVEX -func (x Uint16x32) ShiftRightSignExtended(y Uint16x32) Uint16x32 - -// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -// -// Asm: VPSRAVD, CPU Feature: AVX2 -func (x Uint32x4) ShiftRightSignExtended(y Uint32x4) Uint32x4 - -// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -// -// Asm: VPSRAVD, CPU Feature: AVX2 -func (x Uint32x8) ShiftRightSignExtended(y Uint32x8) Uint32x8 - -// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -// -// Asm: VPSRAVD, CPU Feature: AVX512EVEX -func (x Uint32x16) ShiftRightSignExtended(y Uint32x16) Uint32x16 - -// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -// -// Asm: VPSRAVQ, CPU Feature: AVX512EVEX -func (x Uint64x2) ShiftRightSignExtended(y Uint64x2) Uint64x2 - -// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -// -// Asm: VPSRAVQ, CPU Feature: AVX512EVEX -func (x Uint64x4) ShiftRightSignExtended(y Uint64x4) Uint64x4 - -// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -// -// Asm: VPSRAVQ, CPU Feature: AVX512EVEX -func (x Uint64x8) ShiftRightSignExtended(y Uint64x8) Uint64x8 - -/* ShiftRightSignExtendedMasked */ - -// ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -// -// Asm: VPSRAVW, CPU Feature: AVX512EVEX -func (x Int16x8) ShiftRightSignExtendedMasked(y Int16x8, z Mask16x8) Int16x8 - -// ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -// -// Asm: VPSRAVW, CPU Feature: AVX512EVEX -func (x Int16x16) ShiftRightSignExtendedMasked(y Int16x16, z Mask16x16) Int16x16 - -// ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -// -// Asm: VPSRAVW, CPU Feature: AVX512EVEX -func (x Int16x32) ShiftRightSignExtendedMasked(y Int16x32, z Mask16x32) Int16x32 - -// ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -// -// Asm: VPSRAVD, CPU Feature: AVX512EVEX -func (x Int32x4) ShiftRightSignExtendedMasked(y Int32x4, z Mask32x4) Int32x4 - -// ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -// -// Asm: VPSRAVD, CPU Feature: AVX512EVEX -func (x Int32x8) ShiftRightSignExtendedMasked(y Int32x8, z Mask32x8) Int32x8 - -// ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -// -// Asm: VPSRAVD, CPU Feature: AVX512EVEX -func (x Int32x16) ShiftRightSignExtendedMasked(y Int32x16, z Mask32x16) Int32x16 - -// ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -// -// Asm: VPSRAVQ, CPU Feature: AVX512EVEX -func (x Int64x2) ShiftRightSignExtendedMasked(y Int64x2, z Mask64x2) Int64x2 - -// ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -// -// Asm: VPSRAVQ, CPU Feature: AVX512EVEX -func (x Int64x4) ShiftRightSignExtendedMasked(y Int64x4, z Mask64x4) Int64x4 - -// ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -// -// Asm: VPSRAVQ, CPU Feature: AVX512EVEX -func (x Int64x8) ShiftRightSignExtendedMasked(y Int64x8, z Mask64x8) Int64x8 - -// ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -// -// Asm: VPSRAVW, CPU Feature: AVX512EVEX -func (x Uint16x8) ShiftRightSignExtendedMasked(y Uint16x8, z Mask16x8) Uint16x8 - -// ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -// -// Asm: VPSRAVW, CPU Feature: AVX512EVEX -func (x Uint16x16) ShiftRightSignExtendedMasked(y Uint16x16, z Mask16x16) Uint16x16 - -// ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -// -// Asm: VPSRAVW, CPU Feature: AVX512EVEX -func (x Uint16x32) ShiftRightSignExtendedMasked(y Uint16x32, z Mask16x32) Uint16x32 - -// ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -// -// Asm: VPSRAVD, CPU Feature: AVX512EVEX -func (x Uint32x4) ShiftRightSignExtendedMasked(y Uint32x4, z Mask32x4) Uint32x4 - -// ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -// -// Asm: VPSRAVD, CPU Feature: AVX512EVEX -func (x Uint32x8) ShiftRightSignExtendedMasked(y Uint32x8, z Mask32x8) Uint32x8 - -// ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -// -// Asm: VPSRAVD, CPU Feature: AVX512EVEX -func (x Uint32x16) ShiftRightSignExtendedMasked(y Uint32x16, z Mask32x16) Uint32x16 - -// ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -// -// Asm: VPSRAVQ, CPU Feature: AVX512EVEX -func (x Uint64x2) ShiftRightSignExtendedMasked(y Uint64x2, z Mask64x2) Uint64x2 - -// ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -// -// Asm: VPSRAVQ, CPU Feature: AVX512EVEX -func (x Uint64x4) ShiftRightSignExtendedMasked(y Uint64x4, z Mask64x4) Uint64x4 - -// ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. -// -// Asm: VPSRAVQ, CPU Feature: AVX512EVEX -func (x Uint64x8) ShiftRightSignExtendedMasked(y Uint64x8, z Mask64x8) Uint64x8 - /* Sign */ // Sign returns the product of the first operand with -1, 0, or 1, diff --git a/src/simd/simd_wrapped_test.go b/src/simd/simd_wrapped_test.go index 62096a76cf..15e5c45097 100644 --- a/src/simd/simd_wrapped_test.go +++ b/src/simd/simd_wrapped_test.go @@ -2055,8 +2055,6 @@ func testInt16x8Binary(t *testing.T, v0 []int16, v1 []int16, want []int16, which gotv = vec0.ShiftLeft(vec1) case "ShiftRight": gotv = vec0.ShiftRight(vec1) - case "ShiftRightSignExtended": - gotv = vec0.ShiftRightSignExtended(vec1) case "Sign": gotv = vec0.Sign(vec1) case "Sub": @@ -2101,8 +2099,6 @@ func testInt16x8BinaryMasked(t *testing.T, v0 []int16, v1 []int16, v2 []int16, w gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask16x8()) case "ShiftRightMasked": gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask16x8()) - case "ShiftRightSignExtendedMasked": - gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask16x8()) case "SubMasked": gotv = vec0.SubMasked(vec1, vec2.AsMask16x8()) @@ -2356,8 +2352,6 @@ func testInt16x16Binary(t *testing.T, v0 []int16, v1 []int16, want []int16, whic gotv = vec0.ShiftLeft(vec1) case "ShiftRight": gotv = vec0.ShiftRight(vec1) - case "ShiftRightSignExtended": - gotv = vec0.ShiftRightSignExtended(vec1) case "Sign": gotv = vec0.Sign(vec1) case "Sub": @@ -2402,8 +2396,6 @@ func testInt16x16BinaryMasked(t *testing.T, v0 []int16, v1 []int16, v2 []int16, gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask16x16()) case "ShiftRightMasked": gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask16x16()) - case "ShiftRightSignExtendedMasked": - gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask16x16()) case "SubMasked": gotv = vec0.SubMasked(vec1, vec2.AsMask16x16()) @@ -2643,8 +2635,6 @@ func testInt16x32Binary(t *testing.T, v0 []int16, v1 []int16, want []int16, whic gotv = vec0.ShiftLeft(vec1) case "ShiftRight": gotv = vec0.ShiftRight(vec1) - case "ShiftRightSignExtended": - gotv = vec0.ShiftRightSignExtended(vec1) case "Sub": gotv = vec0.Sub(vec1) @@ -2685,8 +2675,6 @@ func testInt16x32BinaryMasked(t *testing.T, v0 []int16, v1 []int16, v2 []int16, gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask16x32()) case "ShiftRightMasked": gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask16x32()) - case "ShiftRightSignExtendedMasked": - gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask16x32()) case "SubMasked": gotv = vec0.SubMasked(vec1, vec2.AsMask16x32()) @@ -2934,8 +2922,6 @@ func testInt32x4Binary(t *testing.T, v0 []int32, v1 []int32, want []int32, which gotv = vec0.ShiftLeft(vec1) case "ShiftRight": gotv = vec0.ShiftRight(vec1) - case "ShiftRightSignExtended": - gotv = vec0.ShiftRightSignExtended(vec1) case "Sign": gotv = vec0.Sign(vec1) case "Sub": @@ -2984,8 +2970,6 @@ func testInt32x4BinaryMasked(t *testing.T, v0 []int32, v1 []int32, v2 []int32, w gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask32x4()) case "ShiftRightMasked": gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask32x4()) - case "ShiftRightSignExtendedMasked": - gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask32x4()) case "SubMasked": gotv = vec0.SubMasked(vec1, vec2.AsMask32x4()) case "XorMasked": @@ -3311,8 +3295,6 @@ func testInt32x8Binary(t *testing.T, v0 []int32, v1 []int32, want []int32, which gotv = vec0.ShiftLeft(vec1) case "ShiftRight": gotv = vec0.ShiftRight(vec1) - case "ShiftRightSignExtended": - gotv = vec0.ShiftRightSignExtended(vec1) case "Sign": gotv = vec0.Sign(vec1) case "Sub": @@ -3361,8 +3343,6 @@ func testInt32x8BinaryMasked(t *testing.T, v0 []int32, v1 []int32, v2 []int32, w gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask32x8()) case "ShiftRightMasked": gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask32x8()) - case "ShiftRightSignExtendedMasked": - gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask32x8()) case "SubMasked": gotv = vec0.SubMasked(vec1, vec2.AsMask32x8()) case "XorMasked": @@ -3684,8 +3664,6 @@ func testInt32x16Binary(t *testing.T, v0 []int32, v1 []int32, want []int32, whic gotv = vec0.ShiftLeft(vec1) case "ShiftRight": gotv = vec0.ShiftRight(vec1) - case "ShiftRightSignExtended": - gotv = vec0.ShiftRightSignExtended(vec1) case "Sub": gotv = vec0.Sub(vec1) case "Xor": @@ -3732,8 +3710,6 @@ func testInt32x16BinaryMasked(t *testing.T, v0 []int32, v1 []int32, v2 []int32, gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask32x16()) case "ShiftRightMasked": gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask32x16()) - case "ShiftRightSignExtendedMasked": - gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask32x16()) case "SubMasked": gotv = vec0.SubMasked(vec1, vec2.AsMask32x16()) case "XorMasked": @@ -4036,8 +4012,6 @@ func testInt64x2Binary(t *testing.T, v0 []int64, v1 []int64, want []int64, which gotv = vec0.ShiftLeft(vec1) case "ShiftRight": gotv = vec0.ShiftRight(vec1) - case "ShiftRightSignExtended": - gotv = vec0.ShiftRightSignExtended(vec1) case "Sub": gotv = vec0.Sub(vec1) case "Xor": @@ -4086,8 +4060,6 @@ func testInt64x2BinaryMasked(t *testing.T, v0 []int64, v1 []int64, v2 []int64, w gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask64x2()) case "ShiftRightMasked": gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask64x2()) - case "ShiftRightSignExtendedMasked": - gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask64x2()) case "SubMasked": gotv = vec0.SubMasked(vec1, vec2.AsMask64x2()) case "XorMasked": @@ -4292,8 +4264,6 @@ func testInt64x4Binary(t *testing.T, v0 []int64, v1 []int64, want []int64, which gotv = vec0.ShiftLeft(vec1) case "ShiftRight": gotv = vec0.ShiftRight(vec1) - case "ShiftRightSignExtended": - gotv = vec0.ShiftRightSignExtended(vec1) case "Sub": gotv = vec0.Sub(vec1) case "Xor": @@ -4342,8 +4312,6 @@ func testInt64x4BinaryMasked(t *testing.T, v0 []int64, v1 []int64, v2 []int64, w gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask64x4()) case "ShiftRightMasked": gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask64x4()) - case "ShiftRightSignExtendedMasked": - gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask64x4()) case "SubMasked": gotv = vec0.SubMasked(vec1, vec2.AsMask64x4()) case "XorMasked": @@ -4548,8 +4516,6 @@ func testInt64x8Binary(t *testing.T, v0 []int64, v1 []int64, want []int64, which gotv = vec0.ShiftLeft(vec1) case "ShiftRight": gotv = vec0.ShiftRight(vec1) - case "ShiftRightSignExtended": - gotv = vec0.ShiftRightSignExtended(vec1) case "Sub": gotv = vec0.Sub(vec1) case "Xor": @@ -4598,8 +4564,6 @@ func testInt64x8BinaryMasked(t *testing.T, v0 []int64, v1 []int64, v2 []int64, w gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask64x8()) case "ShiftRightMasked": gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask64x8()) - case "ShiftRightSignExtendedMasked": - gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask64x8()) case "SubMasked": gotv = vec0.SubMasked(vec1, vec2.AsMask64x8()) case "XorMasked": @@ -5478,8 +5442,6 @@ func testUint16x8Binary(t *testing.T, v0 []uint16, v1 []uint16, want []uint16, w gotv = vec0.ShiftLeft(vec1) case "ShiftRight": gotv = vec0.ShiftRight(vec1) - case "ShiftRightSignExtended": - gotv = vec0.ShiftRightSignExtended(vec1) case "Sub": gotv = vec0.Sub(vec1) case "Xor": @@ -5522,8 +5484,6 @@ func testUint16x8BinaryMasked(t *testing.T, v0 []uint16, v1 []uint16, v2 []int16 gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask16x8()) case "ShiftRightMasked": gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask16x8()) - case "ShiftRightSignExtendedMasked": - gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask16x8()) case "SubMasked": gotv = vec0.SubMasked(vec1, vec2.AsMask16x8()) @@ -5726,8 +5686,6 @@ func testUint16x16Binary(t *testing.T, v0 []uint16, v1 []uint16, want []uint16, gotv = vec0.ShiftLeft(vec1) case "ShiftRight": gotv = vec0.ShiftRight(vec1) - case "ShiftRightSignExtended": - gotv = vec0.ShiftRightSignExtended(vec1) case "Sub": gotv = vec0.Sub(vec1) case "Xor": @@ -5770,8 +5728,6 @@ func testUint16x16BinaryMasked(t *testing.T, v0 []uint16, v1 []uint16, v2 []int1 gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask16x16()) case "ShiftRightMasked": gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask16x16()) - case "ShiftRightSignExtendedMasked": - gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask16x16()) case "SubMasked": gotv = vec0.SubMasked(vec1, vec2.AsMask16x16()) @@ -5964,8 +5920,6 @@ func testUint16x32Binary(t *testing.T, v0 []uint16, v1 []uint16, want []uint16, gotv = vec0.ShiftLeft(vec1) case "ShiftRight": gotv = vec0.ShiftRight(vec1) - case "ShiftRightSignExtended": - gotv = vec0.ShiftRightSignExtended(vec1) case "Sub": gotv = vec0.Sub(vec1) @@ -6006,8 +5960,6 @@ func testUint16x32BinaryMasked(t *testing.T, v0 []uint16, v1 []uint16, v2 []int1 gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask16x32()) case "ShiftRightMasked": gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask16x32()) - case "ShiftRightSignExtendedMasked": - gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask16x32()) case "SubMasked": gotv = vec0.SubMasked(vec1, vec2.AsMask16x32()) @@ -6206,8 +6158,6 @@ func testUint32x4Binary(t *testing.T, v0 []uint32, v1 []uint32, want []uint32, w gotv = vec0.ShiftLeft(vec1) case "ShiftRight": gotv = vec0.ShiftRight(vec1) - case "ShiftRightSignExtended": - gotv = vec0.ShiftRightSignExtended(vec1) case "Sub": gotv = vec0.Sub(vec1) case "Xor": @@ -6252,8 +6202,6 @@ func testUint32x4BinaryMasked(t *testing.T, v0 []uint32, v1 []uint32, v2 []int32 gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask32x4()) case "ShiftRightMasked": gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask32x4()) - case "ShiftRightSignExtendedMasked": - gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask32x4()) case "SubMasked": gotv = vec0.SubMasked(vec1, vec2.AsMask32x4()) case "XorMasked": @@ -6524,8 +6472,6 @@ func testUint32x8Binary(t *testing.T, v0 []uint32, v1 []uint32, want []uint32, w gotv = vec0.ShiftLeft(vec1) case "ShiftRight": gotv = vec0.ShiftRight(vec1) - case "ShiftRightSignExtended": - gotv = vec0.ShiftRightSignExtended(vec1) case "Sub": gotv = vec0.Sub(vec1) case "Xor": @@ -6570,8 +6516,6 @@ func testUint32x8BinaryMasked(t *testing.T, v0 []uint32, v1 []uint32, v2 []int32 gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask32x8()) case "ShiftRightMasked": gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask32x8()) - case "ShiftRightSignExtendedMasked": - gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask32x8()) case "SubMasked": gotv = vec0.SubMasked(vec1, vec2.AsMask32x8()) case "XorMasked": @@ -6838,8 +6782,6 @@ func testUint32x16Binary(t *testing.T, v0 []uint32, v1 []uint32, want []uint32, gotv = vec0.ShiftLeft(vec1) case "ShiftRight": gotv = vec0.ShiftRight(vec1) - case "ShiftRightSignExtended": - gotv = vec0.ShiftRightSignExtended(vec1) case "Sub": gotv = vec0.Sub(vec1) case "Xor": @@ -6884,8 +6826,6 @@ func testUint32x16BinaryMasked(t *testing.T, v0 []uint32, v1 []uint32, v2 []int3 gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask32x16()) case "ShiftRightMasked": gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask32x16()) - case "ShiftRightSignExtendedMasked": - gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask32x16()) case "SubMasked": gotv = vec0.SubMasked(vec1, vec2.AsMask32x16()) case "XorMasked": @@ -7133,8 +7073,6 @@ func testUint64x2Binary(t *testing.T, v0 []uint64, v1 []uint64, want []uint64, w gotv = vec0.ShiftLeft(vec1) case "ShiftRight": gotv = vec0.ShiftRight(vec1) - case "ShiftRightSignExtended": - gotv = vec0.ShiftRightSignExtended(vec1) case "Sub": gotv = vec0.Sub(vec1) case "Xor": @@ -7181,8 +7119,6 @@ func testUint64x2BinaryMasked(t *testing.T, v0 []uint64, v1 []uint64, v2 []int64 gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask64x2()) case "ShiftRightMasked": gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask64x2()) - case "ShiftRightSignExtendedMasked": - gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask64x2()) case "SubMasked": gotv = vec0.SubMasked(vec1, vec2.AsMask64x2()) case "XorMasked": @@ -7381,8 +7317,6 @@ func testUint64x4Binary(t *testing.T, v0 []uint64, v1 []uint64, want []uint64, w gotv = vec0.ShiftLeft(vec1) case "ShiftRight": gotv = vec0.ShiftRight(vec1) - case "ShiftRightSignExtended": - gotv = vec0.ShiftRightSignExtended(vec1) case "Sub": gotv = vec0.Sub(vec1) case "Xor": @@ -7429,8 +7363,6 @@ func testUint64x4BinaryMasked(t *testing.T, v0 []uint64, v1 []uint64, v2 []int64 gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask64x4()) case "ShiftRightMasked": gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask64x4()) - case "ShiftRightSignExtendedMasked": - gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask64x4()) case "SubMasked": gotv = vec0.SubMasked(vec1, vec2.AsMask64x4()) case "XorMasked": @@ -7629,8 +7561,6 @@ func testUint64x8Binary(t *testing.T, v0 []uint64, v1 []uint64, want []uint64, w gotv = vec0.ShiftLeft(vec1) case "ShiftRight": gotv = vec0.ShiftRight(vec1) - case "ShiftRightSignExtended": - gotv = vec0.ShiftRightSignExtended(vec1) case "Sub": gotv = vec0.Sub(vec1) case "Xor": @@ -7677,8 +7607,6 @@ func testUint64x8BinaryMasked(t *testing.T, v0 []uint64, v1 []uint64, v2 []int64 gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask64x8()) case "ShiftRightMasked": gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask64x8()) - case "ShiftRightSignExtendedMasked": - gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask64x8()) case "SubMasked": gotv = vec0.SubMasked(vec1, vec2.AsMask64x8()) case "XorMasked": @@ -7884,7 +7812,5 @@ func testUint64x8UnaryMasked(t *testing.T, v0 []uint64, v1 []int64, want []uint6 // ShiftAllRightAndFillUpperFrom // ShiftAllRightAndFillUpperFromMasked // ShiftAllRightMasked -// ShiftAllRightSignExtended -// ShiftAllRightSignExtendedMasked // TruncWithPrecision // TruncWithPrecisionMasked -- 2.52.0