From b69622b83e38b58a461938163fdef03683a2a871 Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Fri, 11 Jul 2025 17:56:22 +0000
Subject: [PATCH] [dev.simd] cmd/compile, simd: adjust Shift.* operations

This CL does:
1. Removes ShiftRightSignExtended, default signed vectors to shift
   arithmetic, and unsigned to shift logical.
2. Add the missing Shifts which were left out by YAML error in the
   generator.

This CL is generated by CL 687595.

Change-Id: I663115498adb91c82e89a8476e6748794e997cfa
Reviewed-on: https://go-review.googlesource.com/c/go/+/687596
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
---
 src/cmd/compile/internal/amd64/simdssa.go     |  128 +-
 .../compile/internal/ssa/_gen/simdAMD64.rules |  134 +-
 .../compile/internal/ssa/_gen/simdAMD64ops.go |   98 +-
 .../internal/ssa/_gen/simdgenericOps.go       |   78 +-
 src/cmd/compile/internal/ssa/opGen.go         | 1688 ++++++++++-------
 src/cmd/compile/internal/ssa/rewriteAMD64.go  | 1025 +++++-----
 .../compile/internal/ssagen/simdintrinsics.go |   78 +-
 src/simd/ops_amd64.go                         |  490 ++---
 src/simd/simd_wrapped_test.go                 |   74 -
 9 files changed, 1984 insertions(+), 1809 deletions(-)

diff --git a/src/cmd/compile/internal/amd64/simdssa.go b/src/cmd/compile/internal/amd64/simdssa.go
index 12a8c857bd..e2d0dd17c6 100644
--- a/src/cmd/compile/internal/amd64/simdssa.go
+++ b/src/cmd/compile/internal/amd64/simdssa.go
@@ -273,15 +273,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
 		ssa.OpAMD64VPSLLVQ128,
 		ssa.OpAMD64VPSLLVQ256,
 		ssa.OpAMD64VPSLLVQ512,
-		ssa.OpAMD64VPSRLVW128,
-		ssa.OpAMD64VPSRLVW256,
-		ssa.OpAMD64VPSRLVW512,
-		ssa.OpAMD64VPSRLVD128,
-		ssa.OpAMD64VPSRLVD256,
-		ssa.OpAMD64VPSRLVD512,
-		ssa.OpAMD64VPSRLVQ128,
-		ssa.OpAMD64VPSRLVQ256,
-		ssa.OpAMD64VPSRLVQ512,
 		ssa.OpAMD64VPSRAVW128,
 		ssa.OpAMD64VPSRAVW256,
 		ssa.OpAMD64VPSRAVW512,
@@ -291,6 +282,15 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
 		ssa.OpAMD64VPSRAVQ128,
 		ssa.OpAMD64VPSRAVQ256,
 		ssa.OpAMD64VPSRAVQ512,
+		ssa.OpAMD64VPSRLVW128,
+		ssa.OpAMD64VPSRLVW256,
+		ssa.OpAMD64VPSRLVW512,
+		ssa.OpAMD64VPSRLVD128,
+		ssa.OpAMD64VPSRLVD256,
+		ssa.OpAMD64VPSRLVD512,
+		ssa.OpAMD64VPSRLVQ128,
+		ssa.OpAMD64VPSRLVQ256,
+		ssa.OpAMD64VPSRLVQ512,
 		ssa.OpAMD64VPSIGNB128,
 		ssa.OpAMD64VPSIGNB256,
 		ssa.OpAMD64VPSIGNW128,
@@ -504,15 +504,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
 		ssa.OpAMD64VPSLLVQMasked128,
 		ssa.OpAMD64VPSLLVQMasked256,
 		ssa.OpAMD64VPSLLVQMasked512,
-		ssa.OpAMD64VPSRLVWMasked128,
-		ssa.OpAMD64VPSRLVWMasked256,
-		ssa.OpAMD64VPSRLVWMasked512,
-		ssa.OpAMD64VPSRLVDMasked128,
-		ssa.OpAMD64VPSRLVDMasked256,
-		ssa.OpAMD64VPSRLVDMasked512,
-		ssa.OpAMD64VPSRLVQMasked128,
-		ssa.OpAMD64VPSRLVQMasked256,
-		ssa.OpAMD64VPSRLVQMasked512,
 		ssa.OpAMD64VPSRAVWMasked128,
 		ssa.OpAMD64VPSRAVWMasked256,
 		ssa.OpAMD64VPSRAVWMasked512,
@@ -522,6 +513,15 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
 		ssa.OpAMD64VPSRAVQMasked128,
 		ssa.OpAMD64VPSRAVQMasked256,
 		ssa.OpAMD64VPSRAVQMasked512,
+		ssa.OpAMD64VPSRLVWMasked128,
+		ssa.OpAMD64VPSRLVWMasked256,
+		ssa.OpAMD64VPSRLVWMasked512,
+		ssa.OpAMD64VPSRLVDMasked128,
+		ssa.OpAMD64VPSRLVDMasked256,
+		ssa.OpAMD64VPSRLVDMasked512,
+		ssa.OpAMD64VPSRLVQMasked128,
+		ssa.OpAMD64VPSRLVQMasked256,
+		ssa.OpAMD64VPSRLVQMasked512,
 		ssa.OpAMD64VSUBPSMasked128,
 		ssa.OpAMD64VSUBPSMasked256,
 		ssa.OpAMD64VSUBPSMasked512,
@@ -845,36 +845,60 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
 
 	case ssa.OpAMD64VPSLLW128,
 		ssa.OpAMD64VPSLLW256,
+		ssa.OpAMD64VPSLLW512,
 		ssa.OpAMD64VPSLLD128,
 		ssa.OpAMD64VPSLLD256,
+		ssa.OpAMD64VPSLLD512,
 		ssa.OpAMD64VPSLLQ128,
 		ssa.OpAMD64VPSLLQ256,
 		ssa.OpAMD64VPSLLQ512,
-		ssa.OpAMD64VPSRLW128,
-		ssa.OpAMD64VPSRLW256,
-		ssa.OpAMD64VPSRLD128,
-		ssa.OpAMD64VPSRLD256,
-		ssa.OpAMD64VPSRLQ128,
-		ssa.OpAMD64VPSRLQ256,
-		ssa.OpAMD64VPSRLQ512,
 		ssa.OpAMD64VPSRAW128,
 		ssa.OpAMD64VPSRAW256,
+		ssa.OpAMD64VPSRAW512,
 		ssa.OpAMD64VPSRAD128,
 		ssa.OpAMD64VPSRAD256,
+		ssa.OpAMD64VPSRAD512,
 		ssa.OpAMD64VPSRAQ128,
 		ssa.OpAMD64VPSRAQ256,
-		ssa.OpAMD64VPSRAQ512:
+		ssa.OpAMD64VPSRAQ512,
+		ssa.OpAMD64VPSRLW128,
+		ssa.OpAMD64VPSRLW256,
+		ssa.OpAMD64VPSRLW512,
+		ssa.OpAMD64VPSRLD128,
+		ssa.OpAMD64VPSRLD256,
+		ssa.OpAMD64VPSRLD512,
+		ssa.OpAMD64VPSRLQ128,
+		ssa.OpAMD64VPSRLQ256,
+		ssa.OpAMD64VPSRLQ512:
 		p = simdVfpv(s, v)
 
-	case ssa.OpAMD64VPSLLQMasked128,
+	case ssa.OpAMD64VPSLLWMasked128,
+		ssa.OpAMD64VPSLLWMasked256,
+		ssa.OpAMD64VPSLLWMasked512,
+		ssa.OpAMD64VPSLLDMasked128,
+		ssa.OpAMD64VPSLLDMasked256,
+		ssa.OpAMD64VPSLLDMasked512,
+		ssa.OpAMD64VPSLLQMasked128,
 		ssa.OpAMD64VPSLLQMasked256,
 		ssa.OpAMD64VPSLLQMasked512,
-		ssa.OpAMD64VPSRLQMasked128,
-		ssa.OpAMD64VPSRLQMasked256,
-		ssa.OpAMD64VPSRLQMasked512,
+		ssa.OpAMD64VPSRAWMasked128,
+		ssa.OpAMD64VPSRAWMasked256,
+		ssa.OpAMD64VPSRAWMasked512,
+		ssa.OpAMD64VPSRADMasked128,
+		ssa.OpAMD64VPSRADMasked256,
+		ssa.OpAMD64VPSRADMasked512,
 		ssa.OpAMD64VPSRAQMasked128,
 		ssa.OpAMD64VPSRAQMasked256,
-		ssa.OpAMD64VPSRAQMasked512:
+		ssa.OpAMD64VPSRAQMasked512,
+		ssa.OpAMD64VPSRLWMasked128,
+		ssa.OpAMD64VPSRLWMasked256,
+		ssa.OpAMD64VPSRLWMasked512,
+		ssa.OpAMD64VPSRLDMasked128,
+		ssa.OpAMD64VPSRLDMasked256,
+		ssa.OpAMD64VPSRLDMasked512,
+		ssa.OpAMD64VPSRLQMasked128,
+		ssa.OpAMD64VPSRLQMasked256,
+		ssa.OpAMD64VPSRLQMasked512:
 		p = simdVfpkv(s, v)
 
 	case ssa.OpAMD64VPINSRB128,
@@ -1198,6 +1222,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
 		ssa.OpAMD64VPSHLDQMasked128,
 		ssa.OpAMD64VPSHLDQMasked256,
 		ssa.OpAMD64VPSHLDQMasked512,
+		ssa.OpAMD64VPSLLWMasked128,
+		ssa.OpAMD64VPSLLWMasked256,
+		ssa.OpAMD64VPSLLWMasked512,
+		ssa.OpAMD64VPSLLDMasked128,
+		ssa.OpAMD64VPSLLDMasked256,
+		ssa.OpAMD64VPSLLDMasked512,
 		ssa.OpAMD64VPSLLQMasked128,
 		ssa.OpAMD64VPSLLQMasked256,
 		ssa.OpAMD64VPSLLQMasked512,
@@ -1210,12 +1240,24 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
 		ssa.OpAMD64VPSHRDQMasked128,
 		ssa.OpAMD64VPSHRDQMasked256,
 		ssa.OpAMD64VPSHRDQMasked512,
-		ssa.OpAMD64VPSRLQMasked128,
-		ssa.OpAMD64VPSRLQMasked256,
-		ssa.OpAMD64VPSRLQMasked512,
+		ssa.OpAMD64VPSRAWMasked128,
+		ssa.OpAMD64VPSRAWMasked256,
+		ssa.OpAMD64VPSRAWMasked512,
+		ssa.OpAMD64VPSRADMasked128,
+		ssa.OpAMD64VPSRADMasked256,
+		ssa.OpAMD64VPSRADMasked512,
 		ssa.OpAMD64VPSRAQMasked128,
 		ssa.OpAMD64VPSRAQMasked256,
 		ssa.OpAMD64VPSRAQMasked512,
+		ssa.OpAMD64VPSRLWMasked128,
+		ssa.OpAMD64VPSRLWMasked256,
+		ssa.OpAMD64VPSRLWMasked512,
+		ssa.OpAMD64VPSRLDMasked128,
+		ssa.OpAMD64VPSRLDMasked256,
+		ssa.OpAMD64VPSRLDMasked512,
+		ssa.OpAMD64VPSRLQMasked128,
+		ssa.OpAMD64VPSRLQMasked256,
+		ssa.OpAMD64VPSRLQMasked512,
 		ssa.OpAMD64VPSHLDVWMasked128,
 		ssa.OpAMD64VPSHLDVWMasked256,
 		ssa.OpAMD64VPSHLDVWMasked512,
@@ -1243,15 +1285,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
 		ssa.OpAMD64VPSHRDVQMasked128,
 		ssa.OpAMD64VPSHRDVQMasked256,
 		ssa.OpAMD64VPSHRDVQMasked512,
-		ssa.OpAMD64VPSRLVWMasked128,
-		ssa.OpAMD64VPSRLVWMasked256,
-		ssa.OpAMD64VPSRLVWMasked512,
-		ssa.OpAMD64VPSRLVDMasked128,
-		ssa.OpAMD64VPSRLVDMasked256,
-		ssa.OpAMD64VPSRLVDMasked512,
-		ssa.OpAMD64VPSRLVQMasked128,
-		ssa.OpAMD64VPSRLVQMasked256,
-		ssa.OpAMD64VPSRLVQMasked512,
 		ssa.OpAMD64VPSRAVWMasked128,
 		ssa.OpAMD64VPSRAVWMasked256,
 		ssa.OpAMD64VPSRAVWMasked512,
@@ -1261,6 +1294,15 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
 		ssa.OpAMD64VPSRAVQMasked128,
 		ssa.OpAMD64VPSRAVQMasked256,
 		ssa.OpAMD64VPSRAVQMasked512,
+		ssa.OpAMD64VPSRLVWMasked128,
+		ssa.OpAMD64VPSRLVWMasked256,
+		ssa.OpAMD64VPSRLVWMasked512,
+		ssa.OpAMD64VPSRLVDMasked128,
+		ssa.OpAMD64VPSRLVDMasked256,
+		ssa.OpAMD64VPSRLVDMasked512,
+		ssa.OpAMD64VPSRLVQMasked128,
+		ssa.OpAMD64VPSRLVQMasked256,
+		ssa.OpAMD64VPSRLVQMasked512,
 		ssa.OpAMD64VSQRTPSMasked128,
 		ssa.OpAMD64VSQRTPSMasked256,
 		ssa.OpAMD64VSQRTPSMasked512,
diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
index 7ac4df5958..6043edad70 100644
--- a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
@@ -1239,15 +1239,19 @@
 (SetElemUint64x2 ...) => (VPINSRQ128 ...)
 (ShiftAllLeftInt16x8 ...) => (VPSLLW128 ...)
 (ShiftAllLeftInt16x16 ...) => (VPSLLW256 ...)
+(ShiftAllLeftInt16x32 ...) => (VPSLLW512 ...)
 (ShiftAllLeftInt32x4 ...) => (VPSLLD128 ...)
 (ShiftAllLeftInt32x8 ...) => (VPSLLD256 ...)
+(ShiftAllLeftInt32x16 ...) => (VPSLLD512 ...)
 (ShiftAllLeftInt64x2 ...) => (VPSLLQ128 ...)
 (ShiftAllLeftInt64x4 ...) => (VPSLLQ256 ...)
 (ShiftAllLeftInt64x8 ...) => (VPSLLQ512 ...)
 (ShiftAllLeftUint16x8 ...) => (VPSLLW128 ...)
 (ShiftAllLeftUint16x16 ...) => (VPSLLW256 ...)
+(ShiftAllLeftUint16x32 ...) => (VPSLLW512 ...)
 (ShiftAllLeftUint32x4 ...) => (VPSLLD128 ...)
 (ShiftAllLeftUint32x8 ...) => (VPSLLD256 ...)
+(ShiftAllLeftUint32x16 ...) => (VPSLLD512 ...)
 (ShiftAllLeftUint64x2 ...) => (VPSLLQ128 ...)
 (ShiftAllLeftUint64x4 ...) => (VPSLLQ256 ...)
 (ShiftAllLeftUint64x8 ...) => (VPSLLQ512 ...)
@@ -1287,23 +1291,39 @@
 (ShiftAllLeftAndFillUpperFromMaskedUint64x2 [a] x y mask) => (VPSHLDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
 (ShiftAllLeftAndFillUpperFromMaskedUint64x4 [a] x y mask) => (VPSHLDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
 (ShiftAllLeftAndFillUpperFromMaskedUint64x8 [a] x y mask) => (VPSHLDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+(ShiftAllLeftMaskedInt16x8 x y mask) => (VPSLLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(ShiftAllLeftMaskedInt16x16 x y mask) => (VPSLLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(ShiftAllLeftMaskedInt16x32 x y mask) => (VPSLLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+(ShiftAllLeftMaskedInt32x4 x y mask) => (VPSLLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+(ShiftAllLeftMaskedInt32x8 x y mask) => (VPSLLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(ShiftAllLeftMaskedInt32x16 x y mask) => (VPSLLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
 (ShiftAllLeftMaskedInt64x2 x y mask) => (VPSLLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
 (ShiftAllLeftMaskedInt64x4 x y mask) => (VPSLLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
 (ShiftAllLeftMaskedInt64x8 x y mask) => (VPSLLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+(ShiftAllLeftMaskedUint16x8 x y mask) => (VPSLLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(ShiftAllLeftMaskedUint16x16 x y mask) => (VPSLLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(ShiftAllLeftMaskedUint16x32 x y mask) => (VPSLLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+(ShiftAllLeftMaskedUint32x4 x y mask) => (VPSLLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+(ShiftAllLeftMaskedUint32x8 x y mask) => (VPSLLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(ShiftAllLeftMaskedUint32x16 x y mask) => (VPSLLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
 (ShiftAllLeftMaskedUint64x2 x y mask) => (VPSLLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
 (ShiftAllLeftMaskedUint64x4 x y mask) => (VPSLLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
 (ShiftAllLeftMaskedUint64x8 x y mask) => (VPSLLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftAllRightInt16x8 ...) => (VPSRLW128 ...)
-(ShiftAllRightInt16x16 ...) => (VPSRLW256 ...)
-(ShiftAllRightInt32x4 ...) => (VPSRLD128 ...)
-(ShiftAllRightInt32x8 ...) => (VPSRLD256 ...)
-(ShiftAllRightInt64x2 ...) => (VPSRLQ128 ...)
-(ShiftAllRightInt64x4 ...) => (VPSRLQ256 ...)
-(ShiftAllRightInt64x8 ...) => (VPSRLQ512 ...)
+(ShiftAllRightInt16x8 ...) => (VPSRAW128 ...)
+(ShiftAllRightInt16x16 ...) => (VPSRAW256 ...)
+(ShiftAllRightInt16x32 ...) => (VPSRAW512 ...)
+(ShiftAllRightInt32x4 ...) => (VPSRAD128 ...)
+(ShiftAllRightInt32x8 ...) => (VPSRAD256 ...)
+(ShiftAllRightInt32x16 ...) => (VPSRAD512 ...)
+(ShiftAllRightInt64x2 ...) => (VPSRAQ128 ...)
+(ShiftAllRightInt64x4 ...) => (VPSRAQ256 ...)
+(ShiftAllRightInt64x8 ...) => (VPSRAQ512 ...)
 (ShiftAllRightUint16x8 ...) => (VPSRLW128 ...)
 (ShiftAllRightUint16x16 ...) => (VPSRLW256 ...)
+(ShiftAllRightUint16x32 ...) => (VPSRLW512 ...)
 (ShiftAllRightUint32x4 ...) => (VPSRLD128 ...)
 (ShiftAllRightUint32x8 ...) => (VPSRLD256 ...)
+(ShiftAllRightUint32x16 ...) => (VPSRLD512 ...)
 (ShiftAllRightUint64x2 ...) => (VPSRLQ128 ...)
 (ShiftAllRightUint64x4 ...) => (VPSRLQ256 ...)
 (ShiftAllRightUint64x8 ...) => (VPSRLQ512 ...)
@@ -1343,22 +1363,24 @@
 (ShiftAllRightAndFillUpperFromMaskedUint64x2 [a] x y mask) => (VPSHRDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
 (ShiftAllRightAndFillUpperFromMaskedUint64x4 [a] x y mask) => (VPSHRDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
 (ShiftAllRightAndFillUpperFromMaskedUint64x8 [a] x y mask) => (VPSHRDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedInt64x2 x y mask) => (VPSRLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedInt64x4 x y mask) => (VPSRLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedInt64x8 x y mask) => (VPSRLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+(ShiftAllRightMaskedInt16x8 x y mask) => (VPSRAWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(ShiftAllRightMaskedInt16x16 x y mask) => (VPSRAWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(ShiftAllRightMaskedInt16x32 x y mask) => (VPSRAWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+(ShiftAllRightMaskedInt32x4 x y mask) => (VPSRADMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+(ShiftAllRightMaskedInt32x8 x y mask) => (VPSRADMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(ShiftAllRightMaskedInt32x16 x y mask) => (VPSRADMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+(ShiftAllRightMaskedInt64x2 x y mask) => (VPSRAQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+(ShiftAllRightMaskedInt64x4 x y mask) => (VPSRAQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+(ShiftAllRightMaskedInt64x8 x y mask) => (VPSRAQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+(ShiftAllRightMaskedUint16x8 x y mask) => (VPSRLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(ShiftAllRightMaskedUint16x16 x y mask) => (VPSRLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(ShiftAllRightMaskedUint16x32 x y mask) => (VPSRLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+(ShiftAllRightMaskedUint32x4 x y mask) => (VPSRLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+(ShiftAllRightMaskedUint32x8 x y mask) => (VPSRLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(ShiftAllRightMaskedUint32x16 x y mask) => (VPSRLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
 (ShiftAllRightMaskedUint64x2 x y mask) => (VPSRLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
 (ShiftAllRightMaskedUint64x4 x y mask) => (VPSRLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
 (ShiftAllRightMaskedUint64x8 x y mask) => (VPSRLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftAllRightSignExtendedInt16x8 ...) => (VPSRAW128 ...)
-(ShiftAllRightSignExtendedInt16x16 ...) => (VPSRAW256 ...)
-(ShiftAllRightSignExtendedInt32x4 ...) => (VPSRAD128 ...)
-(ShiftAllRightSignExtendedInt32x8 ...) => (VPSRAD256 ...)
-(ShiftAllRightSignExtendedInt64x2 ...) => (VPSRAQ128 ...)
-(ShiftAllRightSignExtendedInt64x4 ...) => (VPSRAQ256 ...)
-(ShiftAllRightSignExtendedInt64x8 ...) => (VPSRAQ512 ...)
-(ShiftAllRightSignExtendedMaskedInt64x2 x y mask) => (VPSRAQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftAllRightSignExtendedMaskedInt64x4 x y mask) => (VPSRAQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftAllRightSignExtendedMaskedInt64x8 x y mask) => (VPSRAQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (ShiftLeftInt16x8 ...) => (VPSLLVW128 ...)
 (ShiftLeftInt16x16 ...) => (VPSLLVW256 ...)
 (ShiftLeftInt16x32 ...) => (VPSLLVW512 ...)
@@ -1431,15 +1453,15 @@
 (ShiftLeftMaskedUint64x2 x y mask) => (VPSLLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
 (ShiftLeftMaskedUint64x4 x y mask) => (VPSLLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
 (ShiftLeftMaskedUint64x8 x y mask) => (VPSLLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftRightInt16x8 ...) => (VPSRLVW128 ...)
-(ShiftRightInt16x16 ...) => (VPSRLVW256 ...)
-(ShiftRightInt16x32 ...) => (VPSRLVW512 ...)
-(ShiftRightInt32x4 ...) => (VPSRLVD128 ...)
-(ShiftRightInt32x8 ...) => (VPSRLVD256 ...)
-(ShiftRightInt32x16 ...) => (VPSRLVD512 ...)
-(ShiftRightInt64x2 ...) => (VPSRLVQ128 ...)
-(ShiftRightInt64x4 ...) => (VPSRLVQ256 ...)
-(ShiftRightInt64x8 ...) => (VPSRLVQ512 ...)
+(ShiftRightInt16x8 ...) => (VPSRAVW128 ...)
+(ShiftRightInt16x16 ...) => (VPSRAVW256 ...)
+(ShiftRightInt16x32 ...) => (VPSRAVW512 ...)
+(ShiftRightInt32x4 ...) => (VPSRAVD128 ...)
+(ShiftRightInt32x8 ...) => (VPSRAVD256 ...)
+(ShiftRightInt32x16 ...) => (VPSRAVD512 ...)
+(ShiftRightInt64x2 ...) => (VPSRAVQ128 ...)
+(ShiftRightInt64x4 ...) => (VPSRAVQ256 ...)
+(ShiftRightInt64x8 ...) => (VPSRAVQ512 ...)
 (ShiftRightUint16x8 ...) => (VPSRLVW128 ...)
 (ShiftRightUint16x16 ...) => (VPSRLVW256 ...)
 (ShiftRightUint16x32 ...) => (VPSRLVW512 ...)
@@ -1485,15 +1507,15 @@
 (ShiftRightAndFillUpperFromMaskedUint64x2 x y z mask) => (VPSHRDVQMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
 (ShiftRightAndFillUpperFromMaskedUint64x4 x y z mask) => (VPSHRDVQMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
 (ShiftRightAndFillUpperFromMaskedUint64x8 x y z mask) => (VPSHRDVQMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftRightMaskedInt16x8 x y mask) => (VPSRLVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftRightMaskedInt16x16 x y mask) => (VPSRLVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftRightMaskedInt16x32 x y mask) => (VPSRLVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftRightMaskedInt32x4 x y mask) => (VPSRLVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftRightMaskedInt32x8 x y mask) => (VPSRLVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftRightMaskedInt32x16 x y mask) => (VPSRLVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftRightMaskedInt64x2 x y mask) => (VPSRLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftRightMaskedInt64x4 x y mask) => (VPSRLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftRightMaskedInt64x8 x y mask) => (VPSRLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+(ShiftRightMaskedInt16x8 x y mask) => (VPSRAVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(ShiftRightMaskedInt16x16 x y mask) => (VPSRAVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(ShiftRightMaskedInt16x32 x y mask) => (VPSRAVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+(ShiftRightMaskedInt32x4 x y mask) => (VPSRAVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+(ShiftRightMaskedInt32x8 x y mask) => (VPSRAVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(ShiftRightMaskedInt32x16 x y mask) => (VPSRAVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+(ShiftRightMaskedInt64x2 x y mask) => (VPSRAVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+(ShiftRightMaskedInt64x4 x y mask) => (VPSRAVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+(ShiftRightMaskedInt64x8 x y mask) => (VPSRAVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (ShiftRightMaskedUint16x8 x y mask) => (VPSRLVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
 (ShiftRightMaskedUint16x16 x y mask) => (VPSRLVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
 (ShiftRightMaskedUint16x32 x y mask) => (VPSRLVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
@@ -1503,42 +1525,6 @@
 (ShiftRightMaskedUint64x2 x y mask) => (VPSRLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
 (ShiftRightMaskedUint64x4 x y mask) => (VPSRLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
 (ShiftRightMaskedUint64x8 x y mask) => (VPSRLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftRightSignExtendedInt16x8 ...) => (VPSRAVW128 ...)
-(ShiftRightSignExtendedInt16x16 ...) => (VPSRAVW256 ...)
-(ShiftRightSignExtendedInt16x32 ...) => (VPSRAVW512 ...)
-(ShiftRightSignExtendedInt32x4 ...) => (VPSRAVD128 ...)
-(ShiftRightSignExtendedInt32x8 ...) => (VPSRAVD256 ...)
-(ShiftRightSignExtendedInt32x16 ...) => (VPSRAVD512 ...)
-(ShiftRightSignExtendedInt64x2 ...) => (VPSRAVQ128 ...)
-(ShiftRightSignExtendedInt64x4 ...) => (VPSRAVQ256 ...)
-(ShiftRightSignExtendedInt64x8 ...) => (VPSRAVQ512 ...)
-(ShiftRightSignExtendedUint16x8 ...) => (VPSRAVW128 ...)
-(ShiftRightSignExtendedUint16x16 ...) => (VPSRAVW256 ...)
-(ShiftRightSignExtendedUint16x32 ...) => (VPSRAVW512 ...)
-(ShiftRightSignExtendedUint32x4 ...) => (VPSRAVD128 ...)
-(ShiftRightSignExtendedUint32x8 ...) => (VPSRAVD256 ...)
-(ShiftRightSignExtendedUint32x16 ...) => (VPSRAVD512 ...)
-(ShiftRightSignExtendedUint64x2 ...) => (VPSRAVQ128 ...)
-(ShiftRightSignExtendedUint64x4 ...) => (VPSRAVQ256 ...)
-(ShiftRightSignExtendedUint64x8 ...) => (VPSRAVQ512 ...)
-(ShiftRightSignExtendedMaskedInt16x8 x y mask) => (VPSRAVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftRightSignExtendedMaskedInt16x16 x y mask) => (VPSRAVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftRightSignExtendedMaskedInt16x32 x y mask) => (VPSRAVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftRightSignExtendedMaskedInt32x4 x y mask) => (VPSRAVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftRightSignExtendedMaskedInt32x8 x y mask) => (VPSRAVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftRightSignExtendedMaskedInt32x16 x y mask) => (VPSRAVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftRightSignExtendedMaskedInt64x2 x y mask) => (VPSRAVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftRightSignExtendedMaskedInt64x4 x y mask) => (VPSRAVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftRightSignExtendedMaskedInt64x8 x y mask) => (VPSRAVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftRightSignExtendedMaskedUint16x8 x y mask) => (VPSRAVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftRightSignExtendedMaskedUint16x16 x y mask) => (VPSRAVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftRightSignExtendedMaskedUint16x32 x y mask) => (VPSRAVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftRightSignExtendedMaskedUint32x4 x y mask) => (VPSRAVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftRightSignExtendedMaskedUint32x8 x y mask) => (VPSRAVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftRightSignExtendedMaskedUint32x16 x y mask) => (VPSRAVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftRightSignExtendedMaskedUint64x2 x y mask) => (VPSRAVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftRightSignExtendedMaskedUint64x4 x y mask) => (VPSRAVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftRightSignExtendedMaskedUint64x8 x y mask) => (VPSRAVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (SignInt8x16 ...) => (VPSIGNB128 ...)
 (SignInt8x32 ...) => (VPSIGNB256 ...)
 (SignInt16x8 ...) => (VPSIGNW128 ...)
diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go
index f0a149f7d8..3f777db5b7 100644
--- a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go
+++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go
@@ -198,17 +198,16 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
 		{name: "VPSUBSW256", argLength: 2, reg: v21, asm: "VPSUBSW", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSUBSWMasked256", argLength: 3, reg: w2kw, asm: "VPSUBSW", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSLLW256", argLength: 2, reg: vfpv, asm: "VPSLLW", commutative: false, typ: "Vec256", resultInArg0: false},
-		{name: "VPSRLW256", argLength: 2, reg: vfpv, asm: "VPSRLW", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSLLWMasked256", argLength: 3, reg: wfpkw, asm: "VPSLLW", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSRAW256", argLength: 2, reg: vfpv, asm: "VPSRAW", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSRAWMasked256", argLength: 3, reg: wfpkw, asm: "VPSRAW", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSLLVW256", argLength: 2, reg: w21, asm: "VPSLLVW", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSHLDVW256", argLength: 3, reg: w31, asm: "VPSHLDVW", commutative: false, typ: "Vec256", resultInArg0: true},
 		{name: "VPSHLDVWMasked256", argLength: 4, reg: w3kw, asm: "VPSHLDVW", commutative: false, typ: "Vec256", resultInArg0: true},
 		{name: "VPSLLVWMasked256", argLength: 3, reg: w2kw, asm: "VPSLLVW", commutative: false, typ: "Vec256", resultInArg0: false},
-		{name: "VPSRLVW256", argLength: 2, reg: w21, asm: "VPSRLVW", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSRAVW256", argLength: 2, reg: w21, asm: "VPSRAVW", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSHRDVW256", argLength: 3, reg: w31, asm: "VPSHRDVW", commutative: false, typ: "Vec256", resultInArg0: true},
 		{name: "VPSHRDVWMasked256", argLength: 4, reg: w3kw, asm: "VPSHRDVW", commutative: false, typ: "Vec256", resultInArg0: true},
-		{name: "VPSRLVWMasked256", argLength: 3, reg: w2kw, asm: "VPSRLVW", commutative: false, typ: "Vec256", resultInArg0: false},
-		{name: "VPSRAVW256", argLength: 2, reg: w21, asm: "VPSRAVW", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSRAVWMasked256", argLength: 3, reg: w2kw, asm: "VPSRAVW", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSIGNW256", argLength: 2, reg: v21, asm: "VPSIGNW", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSUBW256", argLength: 2, reg: v21, asm: "VPSUBW", commutative: false, typ: "Vec256", resultInArg0: false},
@@ -233,15 +232,17 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
 		{name: "VPADDSWMasked512", argLength: 3, reg: w2kw, asm: "VPADDSW", commutative: true, typ: "Vec512", resultInArg0: false},
 		{name: "VPSUBSW512", argLength: 2, reg: w21, asm: "VPSUBSW", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPSUBSWMasked512", argLength: 3, reg: w2kw, asm: "VPSUBSW", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSLLW512", argLength: 2, reg: wfpw, asm: "VPSLLW", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSLLWMasked512", argLength: 3, reg: wfpkw, asm: "VPSLLW", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSRAW512", argLength: 2, reg: wfpw, asm: "VPSRAW", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSRAWMasked512", argLength: 3, reg: wfpkw, asm: "VPSRAW", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPSLLVW512", argLength: 2, reg: w21, asm: "VPSLLVW", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPSHLDVW512", argLength: 3, reg: w31, asm: "VPSHLDVW", commutative: false, typ: "Vec512", resultInArg0: true},
 		{name: "VPSHLDVWMasked512", argLength: 4, reg: w3kw, asm: "VPSHLDVW", commutative: false, typ: "Vec512", resultInArg0: true},
 		{name: "VPSLLVWMasked512", argLength: 3, reg: w2kw, asm: "VPSLLVW", commutative: false, typ: "Vec512", resultInArg0: false},
-		{name: "VPSRLVW512", argLength: 2, reg: w21, asm: "VPSRLVW", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSRAVW512", argLength: 2, reg: w21, asm: "VPSRAVW", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPSHRDVW512", argLength: 3, reg: w31, asm: "VPSHRDVW", commutative: false, typ: "Vec512", resultInArg0: true},
 		{name: "VPSHRDVWMasked512", argLength: 4, reg: w3kw, asm: "VPSHRDVW", commutative: false, typ: "Vec512", resultInArg0: true},
-		{name: "VPSRLVWMasked512", argLength: 3, reg: w2kw, asm: "VPSRLVW", commutative: false, typ: "Vec512", resultInArg0: false},
-		{name: "VPSRAVW512", argLength: 2, reg: w21, asm: "VPSRAVW", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPSRAVWMasked512", argLength: 3, reg: w2kw, asm: "VPSRAVW", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPSUBW512", argLength: 2, reg: w21, asm: "VPSUBW", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPSUBWMasked512", argLength: 3, reg: w2kw, asm: "VPSUBW", commutative: false, typ: "Vec512", resultInArg0: false},
@@ -272,17 +273,16 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
 		{name: "VPSUBSW128", argLength: 2, reg: v21, asm: "VPSUBSW", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSUBSWMasked128", argLength: 3, reg: w2kw, asm: "VPSUBSW", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSLLW128", argLength: 2, reg: vfpv, asm: "VPSLLW", commutative: false, typ: "Vec128", resultInArg0: false},
-		{name: "VPSRLW128", argLength: 2, reg: vfpv, asm: "VPSRLW", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSLLWMasked128", argLength: 3, reg: wfpkw, asm: "VPSLLW", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSRAW128", argLength: 2, reg: vfpv, asm: "VPSRAW", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSRAWMasked128", argLength: 3, reg: wfpkw, asm: "VPSRAW", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSLLVW128", argLength: 2, reg: w21, asm: "VPSLLVW", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSHLDVW128", argLength: 3, reg: w31, asm: "VPSHLDVW", commutative: false, typ: "Vec128", resultInArg0: true},
 		{name: "VPSHLDVWMasked128", argLength: 4, reg: w3kw, asm: "VPSHLDVW", commutative: false, typ: "Vec128", resultInArg0: true},
 		{name: "VPSLLVWMasked128", argLength: 3, reg: w2kw, asm: "VPSLLVW", commutative: false, typ: "Vec128", resultInArg0: false},
-		{name: "VPSRLVW128", argLength: 2, reg: w21, asm: "VPSRLVW", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSRAVW128", argLength: 2, reg: w21, asm: "VPSRAVW", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSHRDVW128", argLength: 3, reg: w31, asm: "VPSHRDVW", commutative: false, typ: "Vec128", resultInArg0: true},
 		{name: "VPSHRDVWMasked128", argLength: 4, reg: w3kw, asm: "VPSHRDVW", commutative: false, typ: "Vec128", resultInArg0: true},
-		{name: "VPSRLVWMasked128", argLength: 3, reg: w2kw, asm: "VPSRLVW", commutative: false, typ: "Vec128", resultInArg0: false},
-		{name: "VPSRAVW128", argLength: 2, reg: w21, asm: "VPSRAVW", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSRAVWMasked128", argLength: 3, reg: w2kw, asm: "VPSRAVW", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSIGNW128", argLength: 2, reg: v21, asm: "VPSIGNW", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSUBW128", argLength: 2, reg: v21, asm: "VPSUBW", commutative: false, typ: "Vec128", resultInArg0: false},
@@ -315,15 +315,17 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
 		{name: "VPDPWSSDSMasked512", argLength: 4, reg: w3kw, asm: "VPDPWSSDS", commutative: false, typ: "Vec512", resultInArg0: true},
 		{name: "VPDPBUSDS512", argLength: 3, reg: w31, asm: "VPDPBUSDS", commutative: false, typ: "Vec512", resultInArg0: true},
 		{name: "VPDPBUSDSMasked512", argLength: 4, reg: w3kw, asm: "VPDPBUSDS", commutative: false, typ: "Vec512", resultInArg0: true},
+		{name: "VPSLLD512", argLength: 2, reg: wfpw, asm: "VPSLLD", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSLLDMasked512", argLength: 3, reg: wfpkw, asm: "VPSLLD", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSRAD512", argLength: 2, reg: wfpw, asm: "VPSRAD", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSRADMasked512", argLength: 3, reg: wfpkw, asm: "VPSRAD", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPSLLVD512", argLength: 2, reg: w21, asm: "VPSLLVD", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPSHLDVD512", argLength: 3, reg: w31, asm: "VPSHLDVD", commutative: false, typ: "Vec512", resultInArg0: true},
 		{name: "VPSHLDVDMasked512", argLength: 4, reg: w3kw, asm: "VPSHLDVD", commutative: false, typ: "Vec512", resultInArg0: true},
 		{name: "VPSLLVDMasked512", argLength: 3, reg: w2kw, asm: "VPSLLVD", commutative: false, typ: "Vec512", resultInArg0: false},
-		{name: "VPSRLVD512", argLength: 2, reg: w21, asm: "VPSRLVD", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSRAVD512", argLength: 2, reg: w21, asm: "VPSRAVD", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPSHRDVD512", argLength: 3, reg: w31, asm: "VPSHRDVD", commutative: false, typ: "Vec512", resultInArg0: true},
 		{name: "VPSHRDVDMasked512", argLength: 4, reg: w3kw, asm: "VPSHRDVD", commutative: false, typ: "Vec512", resultInArg0: true},
-		{name: "VPSRLVDMasked512", argLength: 3, reg: w2kw, asm: "VPSRLVD", commutative: false, typ: "Vec512", resultInArg0: false},
-		{name: "VPSRAVD512", argLength: 2, reg: w21, asm: "VPSRAVD", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPSRAVDMasked512", argLength: 3, reg: w2kw, asm: "VPSRAVD", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPSUBD512", argLength: 2, reg: w21, asm: "VPSUBD", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPSUBDMasked512", argLength: 3, reg: w2kw, asm: "VPSUBD", commutative: false, typ: "Vec512", resultInArg0: false},
@@ -362,17 +364,16 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
 		{name: "VPDPBUSDS128", argLength: 3, reg: v31, asm: "VPDPBUSDS", commutative: false, typ: "Vec128", resultInArg0: true},
 		{name: "VPDPBUSDSMasked128", argLength: 4, reg: w3kw, asm: "VPDPBUSDS", commutative: false, typ: "Vec128", resultInArg0: true},
 		{name: "VPSLLD128", argLength: 2, reg: vfpv, asm: "VPSLLD", commutative: false, typ: "Vec128", resultInArg0: false},
-		{name: "VPSRLD128", argLength: 2, reg: vfpv, asm: "VPSRLD", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSLLDMasked128", argLength: 3, reg: wfpkw, asm: "VPSLLD", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSRAD128", argLength: 2, reg: vfpv, asm: "VPSRAD", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSRADMasked128", argLength: 3, reg: wfpkw, asm: "VPSRAD", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSLLVD128", argLength: 2, reg: v21, asm: "VPSLLVD", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSHLDVD128", argLength: 3, reg: w31, asm: "VPSHLDVD", commutative: false, typ: "Vec128", resultInArg0: true},
 		{name: "VPSHLDVDMasked128", argLength: 4, reg: w3kw, asm: "VPSHLDVD", commutative: false, typ: "Vec128", resultInArg0: true},
 		{name: "VPSLLVDMasked128", argLength: 3, reg: w2kw, asm: "VPSLLVD", commutative: false, typ: "Vec128", resultInArg0: false},
-		{name: "VPSRLVD128", argLength: 2, reg: v21, asm: "VPSRLVD", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSRAVD128", argLength: 2, reg: v21, asm: "VPSRAVD", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSHRDVD128", argLength: 3, reg: w31, asm: "VPSHRDVD", commutative: false, typ: "Vec128", resultInArg0: true},
 		{name: "VPSHRDVDMasked128", argLength: 4, reg: w3kw, asm: "VPSHRDVD", commutative: false, typ: "Vec128", resultInArg0: true},
-		{name: "VPSRLVDMasked128", argLength: 3, reg: w2kw, asm: "VPSRLVD", commutative: false, typ: "Vec128", resultInArg0: false},
-		{name: "VPSRAVD128", argLength: 2, reg: v21, asm: "VPSRAVD", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSRAVDMasked128", argLength: 3, reg: w2kw, asm: "VPSRAVD", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSIGND128", argLength: 2, reg: v21, asm: "VPSIGND", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSUBD128", argLength: 2, reg: v21, asm: "VPSUBD", commutative: false, typ: "Vec128", resultInArg0: false},
@@ -411,17 +412,16 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
 		{name: "VPDPBUSDS256", argLength: 3, reg: v31, asm: "VPDPBUSDS", commutative: false, typ: "Vec256", resultInArg0: true},
 		{name: "VPDPBUSDSMasked256", argLength: 4, reg: w3kw, asm: "VPDPBUSDS", commutative: false, typ: "Vec256", resultInArg0: true},
 		{name: "VPSLLD256", argLength: 2, reg: vfpv, asm: "VPSLLD", commutative: false, typ: "Vec256", resultInArg0: false},
-		{name: "VPSRLD256", argLength: 2, reg: vfpv, asm: "VPSRLD", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSLLDMasked256", argLength: 3, reg: wfpkw, asm: "VPSLLD", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSRAD256", argLength: 2, reg: vfpv, asm: "VPSRAD", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSRADMasked256", argLength: 3, reg: wfpkw, asm: "VPSRAD", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSLLVD256", argLength: 2, reg: v21, asm: "VPSLLVD", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSHLDVD256", argLength: 3, reg: w31, asm: "VPSHLDVD", commutative: false, typ: "Vec256", resultInArg0: true},
 		{name: "VPSHLDVDMasked256", argLength: 4, reg: w3kw, asm: "VPSHLDVD", commutative: false, typ: "Vec256", resultInArg0: true},
 		{name: "VPSLLVDMasked256", argLength: 3, reg: w2kw, asm: "VPSLLVD", commutative: false, typ: "Vec256", resultInArg0: false},
-		{name: "VPSRLVD256", argLength: 2, reg: v21, asm: "VPSRLVD", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSRAVD256", argLength: 2, reg: v21, asm: "VPSRAVD", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSHRDVD256", argLength: 3, reg: w31, asm: "VPSHRDVD", commutative: false, typ: "Vec256", resultInArg0: true},
 		{name: "VPSHRDVDMasked256", argLength: 4, reg: w3kw, asm: "VPSHRDVD", commutative: false, typ: "Vec256", resultInArg0: true},
-		{name: "VPSRLVDMasked256", argLength: 3, reg: w2kw, asm: "VPSRLVD", commutative: false, typ: "Vec256", resultInArg0: false},
-		{name: "VPSRAVD256", argLength: 2, reg: v21, asm: "VPSRAVD", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSRAVDMasked256", argLength: 3, reg: w2kw, asm: "VPSRAVD", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSIGND256", argLength: 2, reg: v21, asm: "VPSIGND", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSUBD256", argLength: 2, reg: v21, asm: "VPSUBD", commutative: false, typ: "Vec256", resultInArg0: false},
@@ -453,19 +453,15 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
 		{name: "VPRORVQMasked128", argLength: 3, reg: w2kw, asm: "VPRORVQ", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSLLQ128", argLength: 2, reg: vfpv, asm: "VPSLLQ", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSLLQMasked128", argLength: 3, reg: wfpkw, asm: "VPSLLQ", commutative: false, typ: "Vec128", resultInArg0: false},
-		{name: "VPSRLQ128", argLength: 2, reg: vfpv, asm: "VPSRLQ", commutative: false, typ: "Vec128", resultInArg0: false},
-		{name: "VPSRLQMasked128", argLength: 3, reg: wfpkw, asm: "VPSRLQ", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSRAQ128", argLength: 2, reg: wfpw, asm: "VPSRAQ", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSRAQMasked128", argLength: 3, reg: wfpkw, asm: "VPSRAQ", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSLLVQ128", argLength: 2, reg: v21, asm: "VPSLLVQ", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSHLDVQ128", argLength: 3, reg: w31, asm: "VPSHLDVQ", commutative: false, typ: "Vec128", resultInArg0: true},
 		{name: "VPSHLDVQMasked128", argLength: 4, reg: w3kw, asm: "VPSHLDVQ", commutative: false, typ: "Vec128", resultInArg0: true},
 		{name: "VPSLLVQMasked128", argLength: 3, reg: w2kw, asm: "VPSLLVQ", commutative: false, typ: "Vec128", resultInArg0: false},
-		{name: "VPSRLVQ128", argLength: 2, reg: v21, asm: "VPSRLVQ", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSRAVQ128", argLength: 2, reg: w21, asm: "VPSRAVQ", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSHRDVQ128", argLength: 3, reg: w31, asm: "VPSHRDVQ", commutative: false, typ: "Vec128", resultInArg0: true},
 		{name: "VPSHRDVQMasked128", argLength: 4, reg: w3kw, asm: "VPSHRDVQ", commutative: false, typ: "Vec128", resultInArg0: true},
-		{name: "VPSRLVQMasked128", argLength: 3, reg: w2kw, asm: "VPSRLVQ", commutative: false, typ: "Vec128", resultInArg0: false},
-		{name: "VPSRAVQ128", argLength: 2, reg: w21, asm: "VPSRAVQ", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSRAVQMasked128", argLength: 3, reg: w2kw, asm: "VPSRAVQ", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSUBQ128", argLength: 2, reg: v21, asm: "VPSUBQ", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPSUBQMasked128", argLength: 3, reg: w2kw, asm: "VPSUBQ", commutative: false, typ: "Vec128", resultInArg0: false},
@@ -494,19 +490,15 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
 		{name: "VPRORVQMasked256", argLength: 3, reg: w2kw, asm: "VPRORVQ", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSLLQ256", argLength: 2, reg: vfpv, asm: "VPSLLQ", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSLLQMasked256", argLength: 3, reg: wfpkw, asm: "VPSLLQ", commutative: false, typ: "Vec256", resultInArg0: false},
-		{name: "VPSRLQ256", argLength: 2, reg: vfpv, asm: "VPSRLQ", commutative: false, typ: "Vec256", resultInArg0: false},
-		{name: "VPSRLQMasked256", argLength: 3, reg: wfpkw, asm: "VPSRLQ", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSRAQ256", argLength: 2, reg: wfpw, asm: "VPSRAQ", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSRAQMasked256", argLength: 3, reg: wfpkw, asm: "VPSRAQ", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSLLVQ256", argLength: 2, reg: v21, asm: "VPSLLVQ", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSHLDVQ256", argLength: 3, reg: w31, asm: "VPSHLDVQ", commutative: false, typ: "Vec256", resultInArg0: true},
 		{name: "VPSHLDVQMasked256", argLength: 4, reg: w3kw, asm: "VPSHLDVQ", commutative: false, typ: "Vec256", resultInArg0: true},
 		{name: "VPSLLVQMasked256", argLength: 3, reg: w2kw, asm: "VPSLLVQ", commutative: false, typ: "Vec256", resultInArg0: false},
-		{name: "VPSRLVQ256", argLength: 2, reg: v21, asm: "VPSRLVQ", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSRAVQ256", argLength: 2, reg: w21, asm: "VPSRAVQ", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSHRDVQ256", argLength: 3, reg: w31, asm: "VPSHRDVQ", commutative: false, typ: "Vec256", resultInArg0: true},
 		{name: "VPSHRDVQMasked256", argLength: 4, reg: w3kw, asm: "VPSHRDVQ", commutative: false, typ: "Vec256", resultInArg0: true},
-		{name: "VPSRLVQMasked256", argLength: 3, reg: w2kw, asm: "VPSRLVQ", commutative: false, typ: "Vec256", resultInArg0: false},
-		{name: "VPSRAVQ256", argLength: 2, reg: w21, asm: "VPSRAVQ", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSRAVQMasked256", argLength: 3, reg: w2kw, asm: "VPSRAVQ", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSUBQ256", argLength: 2, reg: v21, asm: "VPSUBQ", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPSUBQMasked256", argLength: 3, reg: w2kw, asm: "VPSUBQ", commutative: false, typ: "Vec256", resultInArg0: false},
@@ -537,19 +529,15 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
 		{name: "VPRORVQMasked512", argLength: 3, reg: w2kw, asm: "VPRORVQ", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPSLLQ512", argLength: 2, reg: wfpw, asm: "VPSLLQ", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPSLLQMasked512", argLength: 3, reg: wfpkw, asm: "VPSLLQ", commutative: false, typ: "Vec512", resultInArg0: false},
-		{name: "VPSRLQ512", argLength: 2, reg: wfpw, asm: "VPSRLQ", commutative: false, typ: "Vec512", resultInArg0: false},
-		{name: "VPSRLQMasked512", argLength: 3, reg: wfpkw, asm: "VPSRLQ", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPSRAQ512", argLength: 2, reg: wfpw, asm: "VPSRAQ", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPSRAQMasked512", argLength: 3, reg: wfpkw, asm: "VPSRAQ", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPSLLVQ512", argLength: 2, reg: w21, asm: "VPSLLVQ", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPSHLDVQ512", argLength: 3, reg: w31, asm: "VPSHLDVQ", commutative: false, typ: "Vec512", resultInArg0: true},
 		{name: "VPSHLDVQMasked512", argLength: 4, reg: w3kw, asm: "VPSHLDVQ", commutative: false, typ: "Vec512", resultInArg0: true},
 		{name: "VPSLLVQMasked512", argLength: 3, reg: w2kw, asm: "VPSLLVQ", commutative: false, typ: "Vec512", resultInArg0: false},
-		{name: "VPSRLVQ512", argLength: 2, reg: w21, asm: "VPSRLVQ", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSRAVQ512", argLength: 2, reg: w21, asm: "VPSRAVQ", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPSHRDVQ512", argLength: 3, reg: w31, asm: "VPSHRDVQ", commutative: false, typ: "Vec512", resultInArg0: true},
 		{name: "VPSHRDVQMasked512", argLength: 4, reg: w3kw, asm: "VPSHRDVQ", commutative: false, typ: "Vec512", resultInArg0: true},
-		{name: "VPSRLVQMasked512", argLength: 3, reg: w2kw, asm: "VPSRLVQ", commutative: false, typ: "Vec512", resultInArg0: false},
-		{name: "VPSRAVQ512", argLength: 2, reg: w21, asm: "VPSRAVQ", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPSRAVQMasked512", argLength: 3, reg: w2kw, asm: "VPSRAVQ", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPSUBQ512", argLength: 2, reg: w21, asm: "VPSUBQ", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPSUBQMasked512", argLength: 3, reg: w2kw, asm: "VPSUBQ", commutative: false, typ: "Vec512", resultInArg0: false},
@@ -625,6 +613,10 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
 		{name: "VPMINUWMasked256", argLength: 3, reg: w2kw, asm: "VPMINUW", commutative: true, typ: "Vec256", resultInArg0: false},
 		{name: "VPMULHUW256", argLength: 2, reg: v21, asm: "VPMULHUW", commutative: true, typ: "Vec256", resultInArg0: false},
 		{name: "VPMULHUWMasked256", argLength: 3, reg: w2kw, asm: "VPMULHUW", commutative: true, typ: "Vec256", resultInArg0: false},
+		{name: "VPSRLW256", argLength: 2, reg: vfpv, asm: "VPSRLW", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSRLWMasked256", argLength: 3, reg: wfpkw, asm: "VPSRLW", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSRLVW256", argLength: 2, reg: w21, asm: "VPSRLVW", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSRLVWMasked256", argLength: 3, reg: w2kw, asm: "VPSRLVW", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPAVGW512", argLength: 2, reg: w21, asm: "VPAVGW", commutative: true, typ: "Vec512", resultInArg0: false},
 		{name: "VPAVGWMasked512", argLength: 3, reg: w2kw, asm: "VPAVGW", commutative: true, typ: "Vec512", resultInArg0: false},
 		{name: "VPMAXUW512", argLength: 2, reg: w21, asm: "VPMAXUW", commutative: true, typ: "Vec512", resultInArg0: false},
@@ -633,6 +625,10 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
 		{name: "VPMINUWMasked512", argLength: 3, reg: w2kw, asm: "VPMINUW", commutative: true, typ: "Vec512", resultInArg0: false},
 		{name: "VPMULHUW512", argLength: 2, reg: w21, asm: "VPMULHUW", commutative: true, typ: "Vec512", resultInArg0: false},
 		{name: "VPMULHUWMasked512", argLength: 3, reg: w2kw, asm: "VPMULHUW", commutative: true, typ: "Vec512", resultInArg0: false},
+		{name: "VPSRLW512", argLength: 2, reg: wfpw, asm: "VPSRLW", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSRLWMasked512", argLength: 3, reg: wfpkw, asm: "VPSRLW", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSRLVW512", argLength: 2, reg: w21, asm: "VPSRLVW", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSRLVWMasked512", argLength: 3, reg: w2kw, asm: "VPSRLVW", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPAVGW128", argLength: 2, reg: v21, asm: "VPAVGW", commutative: true, typ: "Vec128", resultInArg0: false},
 		{name: "VPAVGWMasked128", argLength: 3, reg: w2kw, asm: "VPAVGW", commutative: true, typ: "Vec128", resultInArg0: false},
 		{name: "VPMAXUW128", argLength: 2, reg: v21, asm: "VPMAXUW", commutative: true, typ: "Vec128", resultInArg0: false},
@@ -641,36 +637,64 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
 		{name: "VPMINUWMasked128", argLength: 3, reg: w2kw, asm: "VPMINUW", commutative: true, typ: "Vec128", resultInArg0: false},
 		{name: "VPMULHUW128", argLength: 2, reg: v21, asm: "VPMULHUW", commutative: true, typ: "Vec128", resultInArg0: false},
 		{name: "VPMULHUWMasked128", argLength: 3, reg: w2kw, asm: "VPMULHUW", commutative: true, typ: "Vec128", resultInArg0: false},
+		{name: "VPSRLW128", argLength: 2, reg: vfpv, asm: "VPSRLW", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSRLWMasked128", argLength: 3, reg: wfpkw, asm: "VPSRLW", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSRLVW128", argLength: 2, reg: w21, asm: "VPSRLVW", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSRLVWMasked128", argLength: 3, reg: w2kw, asm: "VPSRLVW", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPMAXUD512", argLength: 2, reg: w21, asm: "VPMAXUD", commutative: true, typ: "Vec512", resultInArg0: false},
 		{name: "VPMAXUDMasked512", argLength: 3, reg: w2kw, asm: "VPMAXUD", commutative: true, typ: "Vec512", resultInArg0: false},
 		{name: "VPMINUD512", argLength: 2, reg: w21, asm: "VPMINUD", commutative: true, typ: "Vec512", resultInArg0: false},
 		{name: "VPMINUDMasked512", argLength: 3, reg: w2kw, asm: "VPMINUD", commutative: true, typ: "Vec512", resultInArg0: false},
+		{name: "VPSRLD512", argLength: 2, reg: wfpw, asm: "VPSRLD", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSRLDMasked512", argLength: 3, reg: wfpkw, asm: "VPSRLD", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSRLVD512", argLength: 2, reg: w21, asm: "VPSRLVD", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSRLVDMasked512", argLength: 3, reg: w2kw, asm: "VPSRLVD", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPMAXUD128", argLength: 2, reg: v21, asm: "VPMAXUD", commutative: true, typ: "Vec128", resultInArg0: false},
 		{name: "VPMAXUDMasked128", argLength: 3, reg: w2kw, asm: "VPMAXUD", commutative: true, typ: "Vec128", resultInArg0: false},
 		{name: "VPMINUD128", argLength: 2, reg: v21, asm: "VPMINUD", commutative: true, typ: "Vec128", resultInArg0: false},
 		{name: "VPMINUDMasked128", argLength: 3, reg: w2kw, asm: "VPMINUD", commutative: true, typ: "Vec128", resultInArg0: false},
 		{name: "VPMULUDQ128", argLength: 2, reg: v21, asm: "VPMULUDQ", commutative: true, typ: "Vec128", resultInArg0: false},
+		{name: "VPSRLD128", argLength: 2, reg: vfpv, asm: "VPSRLD", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSRLDMasked128", argLength: 3, reg: wfpkw, asm: "VPSRLD", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSRLVD128", argLength: 2, reg: v21, asm: "VPSRLVD", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSRLVDMasked128", argLength: 3, reg: w2kw, asm: "VPSRLVD", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPMAXUD256", argLength: 2, reg: v21, asm: "VPMAXUD", commutative: true, typ: "Vec256", resultInArg0: false},
 		{name: "VPMAXUDMasked256", argLength: 3, reg: w2kw, asm: "VPMAXUD", commutative: true, typ: "Vec256", resultInArg0: false},
 		{name: "VPMINUD256", argLength: 2, reg: v21, asm: "VPMINUD", commutative: true, typ: "Vec256", resultInArg0: false},
 		{name: "VPMINUDMasked256", argLength: 3, reg: w2kw, asm: "VPMINUD", commutative: true, typ: "Vec256", resultInArg0: false},
 		{name: "VPMULUDQ256", argLength: 2, reg: v21, asm: "VPMULUDQ", commutative: true, typ: "Vec256", resultInArg0: false},
+		{name: "VPSRLD256", argLength: 2, reg: vfpv, asm: "VPSRLD", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSRLDMasked256", argLength: 3, reg: wfpkw, asm: "VPSRLD", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSRLVD256", argLength: 2, reg: v21, asm: "VPSRLVD", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSRLVDMasked256", argLength: 3, reg: w2kw, asm: "VPSRLVD", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPMAXUQ128", argLength: 2, reg: w21, asm: "VPMAXUQ", commutative: true, typ: "Vec128", resultInArg0: false},
 		{name: "VPMAXUQMasked128", argLength: 3, reg: w2kw, asm: "VPMAXUQ", commutative: true, typ: "Vec128", resultInArg0: false},
 		{name: "VPMINUQ128", argLength: 2, reg: w21, asm: "VPMINUQ", commutative: true, typ: "Vec128", resultInArg0: false},
 		{name: "VPMINUQMasked128", argLength: 3, reg: w2kw, asm: "VPMINUQ", commutative: true, typ: "Vec128", resultInArg0: false},
 		{name: "VPMULUDQMasked128", argLength: 3, reg: w2kw, asm: "VPMULUDQ", commutative: true, typ: "Vec128", resultInArg0: false},
+		{name: "VPSRLQ128", argLength: 2, reg: vfpv, asm: "VPSRLQ", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSRLQMasked128", argLength: 3, reg: wfpkw, asm: "VPSRLQ", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSRLVQ128", argLength: 2, reg: v21, asm: "VPSRLVQ", commutative: false, typ: "Vec128", resultInArg0: false},
+		{name: "VPSRLVQMasked128", argLength: 3, reg: w2kw, asm: "VPSRLVQ", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPMAXUQ256", argLength: 2, reg: w21, asm: "VPMAXUQ", commutative: true, typ: "Vec256", resultInArg0: false},
 		{name: "VPMAXUQMasked256", argLength: 3, reg: w2kw, asm: "VPMAXUQ", commutative: true, typ: "Vec256", resultInArg0: false},
 		{name: "VPMINUQ256", argLength: 2, reg: w21, asm: "VPMINUQ", commutative: true, typ: "Vec256", resultInArg0: false},
 		{name: "VPMINUQMasked256", argLength: 3, reg: w2kw, asm: "VPMINUQ", commutative: true, typ: "Vec256", resultInArg0: false},
 		{name: "VPMULUDQMasked256", argLength: 3, reg: w2kw, asm: "VPMULUDQ", commutative: true, typ: "Vec256", resultInArg0: false},
+		{name: "VPSRLQ256", argLength: 2, reg: vfpv, asm: "VPSRLQ", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSRLQMasked256", argLength: 3, reg: wfpkw, asm: "VPSRLQ", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSRLVQ256", argLength: 2, reg: v21, asm: "VPSRLVQ", commutative: false, typ: "Vec256", resultInArg0: false},
+		{name: "VPSRLVQMasked256", argLength: 3, reg: w2kw, asm: "VPSRLVQ", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VPMAXUQ512", argLength: 2, reg: w21, asm: "VPMAXUQ", commutative: true, typ: "Vec512", resultInArg0: false},
 		{name: "VPMAXUQMasked512", argLength: 3, reg: w2kw, asm: "VPMAXUQ", commutative: true, typ: "Vec512", resultInArg0: false},
 		{name: "VPMINUQ512", argLength: 2, reg: w21, asm: "VPMINUQ", commutative: true, typ: "Vec512", resultInArg0: false},
 		{name: "VPMINUQMasked512", argLength: 3, reg: w2kw, asm: "VPMINUQ", commutative: true, typ: "Vec512", resultInArg0: false},
 		{name: "VPMULUDQ512", argLength: 2, reg: w21, asm: "VPMULUDQ", commutative: true, typ: "Vec512", resultInArg0: false},
 		{name: "VPMULUDQMasked512", argLength: 3, reg: w2kw, asm: "VPMULUDQ", commutative: true, typ: "Vec512", resultInArg0: false},
+		{name: "VPSRLQ512", argLength: 2, reg: wfpw, asm: "VPSRLQ", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSRLQMasked512", argLength: 3, reg: wfpkw, asm: "VPSRLQ", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSRLVQ512", argLength: 2, reg: w21, asm: "VPSRLVQ", commutative: false, typ: "Vec512", resultInArg0: false},
+		{name: "VPSRLVQMasked512", argLength: 3, reg: w2kw, asm: "VPSRLVQ", commutative: false, typ: "Vec512", resultInArg0: false},
 		{name: "VPAVGB128", argLength: 2, reg: v21, asm: "VPAVGB", commutative: true, typ: "Vec128", resultInArg0: false},
 		{name: "VPAVGBMasked128", argLength: 3, reg: w2kw, asm: "VPAVGB", commutative: true, typ: "Vec128", resultInArg0: false},
 		{name: "VGF2P8MULB128", argLength: 2, reg: w21, asm: "VGF2P8MULB", commutative: false, typ: "Vec128", resultInArg0: false},
diff --git a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
index d07472b876..1180d32586 100644
--- a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
+++ b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
@@ -312,8 +312,9 @@ func simdGenericOps() []opData {
 		{name: "SaturatedSubInt16x16", argLength: 2, commutative: false},
 		{name: "SaturatedSubMaskedInt16x16", argLength: 3, commutative: false},
 		{name: "ShiftAllLeftInt16x16", argLength: 2, commutative: false},
+		{name: "ShiftAllLeftMaskedInt16x16", argLength: 3, commutative: false},
 		{name: "ShiftAllRightInt16x16", argLength: 2, commutative: false},
-		{name: "ShiftAllRightSignExtendedInt16x16", argLength: 2, commutative: false},
+		{name: "ShiftAllRightMaskedInt16x16", argLength: 3, commutative: false},
 		{name: "ShiftLeftInt16x16", argLength: 2, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromInt16x16", argLength: 3, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromMaskedInt16x16", argLength: 4, commutative: false},
@@ -322,8 +323,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftRightAndFillUpperFromInt16x16", argLength: 3, commutative: false},
 		{name: "ShiftRightAndFillUpperFromMaskedInt16x16", argLength: 4, commutative: false},
 		{name: "ShiftRightMaskedInt16x16", argLength: 3, commutative: false},
-		{name: "ShiftRightSignExtendedInt16x16", argLength: 2, commutative: false},
-		{name: "ShiftRightSignExtendedMaskedInt16x16", argLength: 3, commutative: false},
 		{name: "SignInt16x16", argLength: 2, commutative: false},
 		{name: "SubInt16x16", argLength: 2, commutative: false},
 		{name: "SubMaskedInt16x16", argLength: 3, commutative: false},
@@ -360,6 +359,10 @@ func simdGenericOps() []opData {
 		{name: "SaturatedAddMaskedInt16x32", argLength: 3, commutative: true},
 		{name: "SaturatedSubInt16x32", argLength: 2, commutative: false},
 		{name: "SaturatedSubMaskedInt16x32", argLength: 3, commutative: false},
+		{name: "ShiftAllLeftInt16x32", argLength: 2, commutative: false},
+		{name: "ShiftAllLeftMaskedInt16x32", argLength: 3, commutative: false},
+		{name: "ShiftAllRightInt16x32", argLength: 2, commutative: false},
+		{name: "ShiftAllRightMaskedInt16x32", argLength: 3, commutative: false},
 		{name: "ShiftLeftInt16x32", argLength: 2, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromInt16x32", argLength: 3, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromMaskedInt16x32", argLength: 4, commutative: false},
@@ -368,8 +371,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftRightAndFillUpperFromInt16x32", argLength: 3, commutative: false},
 		{name: "ShiftRightAndFillUpperFromMaskedInt16x32", argLength: 4, commutative: false},
 		{name: "ShiftRightMaskedInt16x32", argLength: 3, commutative: false},
-		{name: "ShiftRightSignExtendedInt16x32", argLength: 2, commutative: false},
-		{name: "ShiftRightSignExtendedMaskedInt16x32", argLength: 3, commutative: false},
 		{name: "SubInt16x32", argLength: 2, commutative: false},
 		{name: "SubMaskedInt16x32", argLength: 3, commutative: false},
 		{name: "AbsoluteInt16x8", argLength: 1, commutative: false},
@@ -412,8 +413,9 @@ func simdGenericOps() []opData {
 		{name: "SaturatedSubInt16x8", argLength: 2, commutative: false},
 		{name: "SaturatedSubMaskedInt16x8", argLength: 3, commutative: false},
 		{name: "ShiftAllLeftInt16x8", argLength: 2, commutative: false},
+		{name: "ShiftAllLeftMaskedInt16x8", argLength: 3, commutative: false},
 		{name: "ShiftAllRightInt16x8", argLength: 2, commutative: false},
-		{name: "ShiftAllRightSignExtendedInt16x8", argLength: 2, commutative: false},
+		{name: "ShiftAllRightMaskedInt16x8", argLength: 3, commutative: false},
 		{name: "ShiftLeftInt16x8", argLength: 2, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromInt16x8", argLength: 3, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromMaskedInt16x8", argLength: 4, commutative: false},
@@ -422,8 +424,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftRightAndFillUpperFromInt16x8", argLength: 3, commutative: false},
 		{name: "ShiftRightAndFillUpperFromMaskedInt16x8", argLength: 4, commutative: false},
 		{name: "ShiftRightMaskedInt16x8", argLength: 3, commutative: false},
-		{name: "ShiftRightSignExtendedInt16x8", argLength: 2, commutative: false},
-		{name: "ShiftRightSignExtendedMaskedInt16x8", argLength: 3, commutative: false},
 		{name: "SignInt16x8", argLength: 2, commutative: false},
 		{name: "SubInt16x8", argLength: 2, commutative: false},
 		{name: "SubMaskedInt16x8", argLength: 3, commutative: false},
@@ -468,6 +468,10 @@ func simdGenericOps() []opData {
 		{name: "SaturatedPairDotProdAccumulateMaskedInt32x16", argLength: 4, commutative: false},
 		{name: "SaturatedUnsignedSignedQuadDotProdAccumulateInt32x16", argLength: 3, commutative: false},
 		{name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x16", argLength: 4, commutative: false},
+		{name: "ShiftAllLeftInt32x16", argLength: 2, commutative: false},
+		{name: "ShiftAllLeftMaskedInt32x16", argLength: 3, commutative: false},
+		{name: "ShiftAllRightInt32x16", argLength: 2, commutative: false},
+		{name: "ShiftAllRightMaskedInt32x16", argLength: 3, commutative: false},
 		{name: "ShiftLeftInt32x16", argLength: 2, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromInt32x16", argLength: 3, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromMaskedInt32x16", argLength: 4, commutative: false},
@@ -476,8 +480,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftRightAndFillUpperFromInt32x16", argLength: 3, commutative: false},
 		{name: "ShiftRightAndFillUpperFromMaskedInt32x16", argLength: 4, commutative: false},
 		{name: "ShiftRightMaskedInt32x16", argLength: 3, commutative: false},
-		{name: "ShiftRightSignExtendedInt32x16", argLength: 2, commutative: false},
-		{name: "ShiftRightSignExtendedMaskedInt32x16", argLength: 3, commutative: false},
 		{name: "SubInt32x16", argLength: 2, commutative: false},
 		{name: "SubMaskedInt32x16", argLength: 3, commutative: false},
 		{name: "UnsignedSignedQuadDotProdAccumulateInt32x16", argLength: 3, commutative: false},
@@ -528,8 +530,9 @@ func simdGenericOps() []opData {
 		{name: "SaturatedUnsignedSignedQuadDotProdAccumulateInt32x4", argLength: 3, commutative: false},
 		{name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x4", argLength: 4, commutative: false},
 		{name: "ShiftAllLeftInt32x4", argLength: 2, commutative: false},
+		{name: "ShiftAllLeftMaskedInt32x4", argLength: 3, commutative: false},
 		{name: "ShiftAllRightInt32x4", argLength: 2, commutative: false},
-		{name: "ShiftAllRightSignExtendedInt32x4", argLength: 2, commutative: false},
+		{name: "ShiftAllRightMaskedInt32x4", argLength: 3, commutative: false},
 		{name: "ShiftLeftInt32x4", argLength: 2, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromInt32x4", argLength: 3, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromMaskedInt32x4", argLength: 4, commutative: false},
@@ -538,8 +541,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftRightAndFillUpperFromInt32x4", argLength: 3, commutative: false},
 		{name: "ShiftRightAndFillUpperFromMaskedInt32x4", argLength: 4, commutative: false},
 		{name: "ShiftRightMaskedInt32x4", argLength: 3, commutative: false},
-		{name: "ShiftRightSignExtendedInt32x4", argLength: 2, commutative: false},
-		{name: "ShiftRightSignExtendedMaskedInt32x4", argLength: 3, commutative: false},
 		{name: "SignInt32x4", argLength: 2, commutative: false},
 		{name: "SubInt32x4", argLength: 2, commutative: false},
 		{name: "SubMaskedInt32x4", argLength: 3, commutative: false},
@@ -591,8 +592,9 @@ func simdGenericOps() []opData {
 		{name: "SaturatedUnsignedSignedQuadDotProdAccumulateInt32x8", argLength: 3, commutative: false},
 		{name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x8", argLength: 4, commutative: false},
 		{name: "ShiftAllLeftInt32x8", argLength: 2, commutative: false},
+		{name: "ShiftAllLeftMaskedInt32x8", argLength: 3, commutative: false},
 		{name: "ShiftAllRightInt32x8", argLength: 2, commutative: false},
-		{name: "ShiftAllRightSignExtendedInt32x8", argLength: 2, commutative: false},
+		{name: "ShiftAllRightMaskedInt32x8", argLength: 3, commutative: false},
 		{name: "ShiftLeftInt32x8", argLength: 2, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromInt32x8", argLength: 3, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromMaskedInt32x8", argLength: 4, commutative: false},
@@ -601,8 +603,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftRightAndFillUpperFromInt32x8", argLength: 3, commutative: false},
 		{name: "ShiftRightAndFillUpperFromMaskedInt32x8", argLength: 4, commutative: false},
 		{name: "ShiftRightMaskedInt32x8", argLength: 3, commutative: false},
-		{name: "ShiftRightSignExtendedInt32x8", argLength: 2, commutative: false},
-		{name: "ShiftRightSignExtendedMaskedInt32x8", argLength: 3, commutative: false},
 		{name: "SignInt32x8", argLength: 2, commutative: false},
 		{name: "SubInt32x8", argLength: 2, commutative: false},
 		{name: "SubMaskedInt32x8", argLength: 3, commutative: false},
@@ -650,8 +650,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftAllLeftMaskedInt64x2", argLength: 3, commutative: false},
 		{name: "ShiftAllRightInt64x2", argLength: 2, commutative: false},
 		{name: "ShiftAllRightMaskedInt64x2", argLength: 3, commutative: false},
-		{name: "ShiftAllRightSignExtendedInt64x2", argLength: 2, commutative: false},
-		{name: "ShiftAllRightSignExtendedMaskedInt64x2", argLength: 3, commutative: false},
 		{name: "ShiftLeftInt64x2", argLength: 2, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromInt64x2", argLength: 3, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromMaskedInt64x2", argLength: 4, commutative: false},
@@ -660,8 +658,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftRightAndFillUpperFromInt64x2", argLength: 3, commutative: false},
 		{name: "ShiftRightAndFillUpperFromMaskedInt64x2", argLength: 4, commutative: false},
 		{name: "ShiftRightMaskedInt64x2", argLength: 3, commutative: false},
-		{name: "ShiftRightSignExtendedInt64x2", argLength: 2, commutative: false},
-		{name: "ShiftRightSignExtendedMaskedInt64x2", argLength: 3, commutative: false},
 		{name: "SubInt64x2", argLength: 2, commutative: false},
 		{name: "SubMaskedInt64x2", argLength: 3, commutative: false},
 		{name: "XorInt64x2", argLength: 2, commutative: true},
@@ -706,8 +702,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftAllLeftMaskedInt64x4", argLength: 3, commutative: false},
 		{name: "ShiftAllRightInt64x4", argLength: 2, commutative: false},
 		{name: "ShiftAllRightMaskedInt64x4", argLength: 3, commutative: false},
-		{name: "ShiftAllRightSignExtendedInt64x4", argLength: 2, commutative: false},
-		{name: "ShiftAllRightSignExtendedMaskedInt64x4", argLength: 3, commutative: false},
 		{name: "ShiftLeftInt64x4", argLength: 2, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromInt64x4", argLength: 3, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromMaskedInt64x4", argLength: 4, commutative: false},
@@ -716,8 +710,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftRightAndFillUpperFromInt64x4", argLength: 3, commutative: false},
 		{name: "ShiftRightAndFillUpperFromMaskedInt64x4", argLength: 4, commutative: false},
 		{name: "ShiftRightMaskedInt64x4", argLength: 3, commutative: false},
-		{name: "ShiftRightSignExtendedInt64x4", argLength: 2, commutative: false},
-		{name: "ShiftRightSignExtendedMaskedInt64x4", argLength: 3, commutative: false},
 		{name: "SubInt64x4", argLength: 2, commutative: false},
 		{name: "SubMaskedInt64x4", argLength: 3, commutative: false},
 		{name: "XorInt64x4", argLength: 2, commutative: true},
@@ -762,8 +754,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftAllLeftMaskedInt64x8", argLength: 3, commutative: false},
 		{name: "ShiftAllRightInt64x8", argLength: 2, commutative: false},
 		{name: "ShiftAllRightMaskedInt64x8", argLength: 3, commutative: false},
-		{name: "ShiftAllRightSignExtendedInt64x8", argLength: 2, commutative: false},
-		{name: "ShiftAllRightSignExtendedMaskedInt64x8", argLength: 3, commutative: false},
 		{name: "ShiftLeftInt64x8", argLength: 2, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromInt64x8", argLength: 3, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromMaskedInt64x8", argLength: 4, commutative: false},
@@ -772,8 +762,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftRightAndFillUpperFromInt64x8", argLength: 3, commutative: false},
 		{name: "ShiftRightAndFillUpperFromMaskedInt64x8", argLength: 4, commutative: false},
 		{name: "ShiftRightMaskedInt64x8", argLength: 3, commutative: false},
-		{name: "ShiftRightSignExtendedInt64x8", argLength: 2, commutative: false},
-		{name: "ShiftRightSignExtendedMaskedInt64x8", argLength: 3, commutative: false},
 		{name: "SubInt64x8", argLength: 2, commutative: false},
 		{name: "SubMaskedInt64x8", argLength: 3, commutative: false},
 		{name: "XorInt64x8", argLength: 2, commutative: true},
@@ -906,7 +894,9 @@ func simdGenericOps() []opData {
 		{name: "SaturatedSubUint16x16", argLength: 2, commutative: false},
 		{name: "SaturatedSubMaskedUint16x16", argLength: 3, commutative: false},
 		{name: "ShiftAllLeftUint16x16", argLength: 2, commutative: false},
+		{name: "ShiftAllLeftMaskedUint16x16", argLength: 3, commutative: false},
 		{name: "ShiftAllRightUint16x16", argLength: 2, commutative: false},
+		{name: "ShiftAllRightMaskedUint16x16", argLength: 3, commutative: false},
 		{name: "ShiftLeftUint16x16", argLength: 2, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromUint16x16", argLength: 3, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromMaskedUint16x16", argLength: 4, commutative: false},
@@ -915,8 +905,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftRightAndFillUpperFromUint16x16", argLength: 3, commutative: false},
 		{name: "ShiftRightAndFillUpperFromMaskedUint16x16", argLength: 4, commutative: false},
 		{name: "ShiftRightMaskedUint16x16", argLength: 3, commutative: false},
-		{name: "ShiftRightSignExtendedUint16x16", argLength: 2, commutative: false},
-		{name: "ShiftRightSignExtendedMaskedUint16x16", argLength: 3, commutative: false},
 		{name: "SubUint16x16", argLength: 2, commutative: false},
 		{name: "SubMaskedUint16x16", argLength: 3, commutative: false},
 		{name: "XorUint16x16", argLength: 2, commutative: true},
@@ -948,6 +936,10 @@ func simdGenericOps() []opData {
 		{name: "SaturatedAddMaskedUint16x32", argLength: 3, commutative: true},
 		{name: "SaturatedSubUint16x32", argLength: 2, commutative: false},
 		{name: "SaturatedSubMaskedUint16x32", argLength: 3, commutative: false},
+		{name: "ShiftAllLeftUint16x32", argLength: 2, commutative: false},
+		{name: "ShiftAllLeftMaskedUint16x32", argLength: 3, commutative: false},
+		{name: "ShiftAllRightUint16x32", argLength: 2, commutative: false},
+		{name: "ShiftAllRightMaskedUint16x32", argLength: 3, commutative: false},
 		{name: "ShiftLeftUint16x32", argLength: 2, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromUint16x32", argLength: 3, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromMaskedUint16x32", argLength: 4, commutative: false},
@@ -956,8 +948,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftRightAndFillUpperFromUint16x32", argLength: 3, commutative: false},
 		{name: "ShiftRightAndFillUpperFromMaskedUint16x32", argLength: 4, commutative: false},
 		{name: "ShiftRightMaskedUint16x32", argLength: 3, commutative: false},
-		{name: "ShiftRightSignExtendedUint16x32", argLength: 2, commutative: false},
-		{name: "ShiftRightSignExtendedMaskedUint16x32", argLength: 3, commutative: false},
 		{name: "SubUint16x32", argLength: 2, commutative: false},
 		{name: "SubMaskedUint16x32", argLength: 3, commutative: false},
 		{name: "AddUint16x8", argLength: 2, commutative: true},
@@ -994,7 +984,9 @@ func simdGenericOps() []opData {
 		{name: "SaturatedSubUint16x8", argLength: 2, commutative: false},
 		{name: "SaturatedSubMaskedUint16x8", argLength: 3, commutative: false},
 		{name: "ShiftAllLeftUint16x8", argLength: 2, commutative: false},
+		{name: "ShiftAllLeftMaskedUint16x8", argLength: 3, commutative: false},
 		{name: "ShiftAllRightUint16x8", argLength: 2, commutative: false},
+		{name: "ShiftAllRightMaskedUint16x8", argLength: 3, commutative: false},
 		{name: "ShiftLeftUint16x8", argLength: 2, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromUint16x8", argLength: 3, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromMaskedUint16x8", argLength: 4, commutative: false},
@@ -1003,8 +995,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftRightAndFillUpperFromUint16x8", argLength: 3, commutative: false},
 		{name: "ShiftRightAndFillUpperFromMaskedUint16x8", argLength: 4, commutative: false},
 		{name: "ShiftRightMaskedUint16x8", argLength: 3, commutative: false},
-		{name: "ShiftRightSignExtendedUint16x8", argLength: 2, commutative: false},
-		{name: "ShiftRightSignExtendedMaskedUint16x8", argLength: 3, commutative: false},
 		{name: "SubUint16x8", argLength: 2, commutative: false},
 		{name: "SubMaskedUint16x8", argLength: 3, commutative: false},
 		{name: "XorUint16x8", argLength: 2, commutative: true},
@@ -1040,6 +1030,10 @@ func simdGenericOps() []opData {
 		{name: "RotateRightMaskedUint32x16", argLength: 3, commutative: false},
 		{name: "SaturatedUnsignedSignedQuadDotProdAccumulateUint32x16", argLength: 3, commutative: false},
 		{name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x16", argLength: 4, commutative: false},
+		{name: "ShiftAllLeftUint32x16", argLength: 2, commutative: false},
+		{name: "ShiftAllLeftMaskedUint32x16", argLength: 3, commutative: false},
+		{name: "ShiftAllRightUint32x16", argLength: 2, commutative: false},
+		{name: "ShiftAllRightMaskedUint32x16", argLength: 3, commutative: false},
 		{name: "ShiftLeftUint32x16", argLength: 2, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromUint32x16", argLength: 3, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromMaskedUint32x16", argLength: 4, commutative: false},
@@ -1048,8 +1042,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftRightAndFillUpperFromUint32x16", argLength: 3, commutative: false},
 		{name: "ShiftRightAndFillUpperFromMaskedUint32x16", argLength: 4, commutative: false},
 		{name: "ShiftRightMaskedUint32x16", argLength: 3, commutative: false},
-		{name: "ShiftRightSignExtendedUint32x16", argLength: 2, commutative: false},
-		{name: "ShiftRightSignExtendedMaskedUint32x16", argLength: 3, commutative: false},
 		{name: "SubUint32x16", argLength: 2, commutative: false},
 		{name: "SubMaskedUint32x16", argLength: 3, commutative: false},
 		{name: "UnsignedSignedQuadDotProdAccumulateUint32x16", argLength: 3, commutative: false},
@@ -1092,7 +1084,9 @@ func simdGenericOps() []opData {
 		{name: "SaturatedUnsignedSignedQuadDotProdAccumulateUint32x4", argLength: 3, commutative: false},
 		{name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x4", argLength: 4, commutative: false},
 		{name: "ShiftAllLeftUint32x4", argLength: 2, commutative: false},
+		{name: "ShiftAllLeftMaskedUint32x4", argLength: 3, commutative: false},
 		{name: "ShiftAllRightUint32x4", argLength: 2, commutative: false},
+		{name: "ShiftAllRightMaskedUint32x4", argLength: 3, commutative: false},
 		{name: "ShiftLeftUint32x4", argLength: 2, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromUint32x4", argLength: 3, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromMaskedUint32x4", argLength: 4, commutative: false},
@@ -1101,8 +1095,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftRightAndFillUpperFromUint32x4", argLength: 3, commutative: false},
 		{name: "ShiftRightAndFillUpperFromMaskedUint32x4", argLength: 4, commutative: false},
 		{name: "ShiftRightMaskedUint32x4", argLength: 3, commutative: false},
-		{name: "ShiftRightSignExtendedUint32x4", argLength: 2, commutative: false},
-		{name: "ShiftRightSignExtendedMaskedUint32x4", argLength: 3, commutative: false},
 		{name: "SubUint32x4", argLength: 2, commutative: false},
 		{name: "SubMaskedUint32x4", argLength: 3, commutative: false},
 		{name: "UnsignedSignedQuadDotProdAccumulateUint32x4", argLength: 3, commutative: false},
@@ -1145,7 +1137,9 @@ func simdGenericOps() []opData {
 		{name: "SaturatedUnsignedSignedQuadDotProdAccumulateUint32x8", argLength: 3, commutative: false},
 		{name: "SaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x8", argLength: 4, commutative: false},
 		{name: "ShiftAllLeftUint32x8", argLength: 2, commutative: false},
+		{name: "ShiftAllLeftMaskedUint32x8", argLength: 3, commutative: false},
 		{name: "ShiftAllRightUint32x8", argLength: 2, commutative: false},
+		{name: "ShiftAllRightMaskedUint32x8", argLength: 3, commutative: false},
 		{name: "ShiftLeftUint32x8", argLength: 2, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromUint32x8", argLength: 3, commutative: false},
 		{name: "ShiftLeftAndFillUpperFromMaskedUint32x8", argLength: 4, commutative: false},
@@ -1154,8 +1148,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftRightAndFillUpperFromUint32x8", argLength: 3, commutative: false},
 		{name: "ShiftRightAndFillUpperFromMaskedUint32x8", argLength: 4, commutative: false},
 		{name: "ShiftRightMaskedUint32x8", argLength: 3, commutative: false},
-		{name: "ShiftRightSignExtendedUint32x8", argLength: 2, commutative: false},
-		{name: "ShiftRightSignExtendedMaskedUint32x8", argLength: 3, commutative: false},
 		{name: "SubUint32x8", argLength: 2, commutative: false},
 		{name: "SubMaskedUint32x8", argLength: 3, commutative: false},
 		{name: "UnsignedSignedQuadDotProdAccumulateUint32x8", argLength: 3, commutative: false},
@@ -1206,8 +1198,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftRightAndFillUpperFromUint64x2", argLength: 3, commutative: false},
 		{name: "ShiftRightAndFillUpperFromMaskedUint64x2", argLength: 4, commutative: false},
 		{name: "ShiftRightMaskedUint64x2", argLength: 3, commutative: false},
-		{name: "ShiftRightSignExtendedUint64x2", argLength: 2, commutative: false},
-		{name: "ShiftRightSignExtendedMaskedUint64x2", argLength: 3, commutative: false},
 		{name: "SubUint64x2", argLength: 2, commutative: false},
 		{name: "SubMaskedUint64x2", argLength: 3, commutative: false},
 		{name: "XorUint64x2", argLength: 2, commutative: true},
@@ -1256,8 +1246,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftRightAndFillUpperFromUint64x4", argLength: 3, commutative: false},
 		{name: "ShiftRightAndFillUpperFromMaskedUint64x4", argLength: 4, commutative: false},
 		{name: "ShiftRightMaskedUint64x4", argLength: 3, commutative: false},
-		{name: "ShiftRightSignExtendedUint64x4", argLength: 2, commutative: false},
-		{name: "ShiftRightSignExtendedMaskedUint64x4", argLength: 3, commutative: false},
 		{name: "SubUint64x4", argLength: 2, commutative: false},
 		{name: "SubMaskedUint64x4", argLength: 3, commutative: false},
 		{name: "XorUint64x4", argLength: 2, commutative: true},
@@ -1306,8 +1294,6 @@ func simdGenericOps() []opData {
 		{name: "ShiftRightAndFillUpperFromUint64x8", argLength: 3, commutative: false},
 		{name: "ShiftRightAndFillUpperFromMaskedUint64x8", argLength: 4, commutative: false},
 		{name: "ShiftRightMaskedUint64x8", argLength: 3, commutative: false},
-		{name: "ShiftRightSignExtendedUint64x8", argLength: 2, commutative: false},
-		{name: "ShiftRightSignExtendedMaskedUint64x8", argLength: 3, commutative: false},
 		{name: "SubUint64x8", argLength: 2, commutative: false},
 		{name: "SubMaskedUint64x8", argLength: 3, commutative: false},
 		{name: "XorUint64x8", argLength: 2, commutative: true},
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index d5c5085949..9067023f3a 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -1393,17 +1393,16 @@ const (
 	OpAMD64VPSUBSW256
 	OpAMD64VPSUBSWMasked256
 	OpAMD64VPSLLW256
-	OpAMD64VPSRLW256
+	OpAMD64VPSLLWMasked256
 	OpAMD64VPSRAW256
+	OpAMD64VPSRAWMasked256
 	OpAMD64VPSLLVW256
 	OpAMD64VPSHLDVW256
 	OpAMD64VPSHLDVWMasked256
 	OpAMD64VPSLLVWMasked256
-	OpAMD64VPSRLVW256
+	OpAMD64VPSRAVW256
 	OpAMD64VPSHRDVW256
 	OpAMD64VPSHRDVWMasked256
-	OpAMD64VPSRLVWMasked256
-	OpAMD64VPSRAVW256
 	OpAMD64VPSRAVWMasked256
 	OpAMD64VPSIGNW256
 	OpAMD64VPSUBW256
@@ -1428,15 +1427,17 @@ const (
 	OpAMD64VPADDSWMasked512
 	OpAMD64VPSUBSW512
 	OpAMD64VPSUBSWMasked512
+	OpAMD64VPSLLW512
+	OpAMD64VPSLLWMasked512
+	OpAMD64VPSRAW512
+	OpAMD64VPSRAWMasked512
 	OpAMD64VPSLLVW512
 	OpAMD64VPSHLDVW512
 	OpAMD64VPSHLDVWMasked512
 	OpAMD64VPSLLVWMasked512
-	OpAMD64VPSRLVW512
+	OpAMD64VPSRAVW512
 	OpAMD64VPSHRDVW512
 	OpAMD64VPSHRDVWMasked512
-	OpAMD64VPSRLVWMasked512
-	OpAMD64VPSRAVW512
 	OpAMD64VPSRAVWMasked512
 	OpAMD64VPSUBW512
 	OpAMD64VPSUBWMasked512
@@ -1467,17 +1468,16 @@ const (
 	OpAMD64VPSUBSW128
 	OpAMD64VPSUBSWMasked128
 	OpAMD64VPSLLW128
-	OpAMD64VPSRLW128
+	OpAMD64VPSLLWMasked128
 	OpAMD64VPSRAW128
+	OpAMD64VPSRAWMasked128
 	OpAMD64VPSLLVW128
 	OpAMD64VPSHLDVW128
 	OpAMD64VPSHLDVWMasked128
 	OpAMD64VPSLLVWMasked128
-	OpAMD64VPSRLVW128
+	OpAMD64VPSRAVW128
 	OpAMD64VPSHRDVW128
 	OpAMD64VPSHRDVWMasked128
-	OpAMD64VPSRLVWMasked128
-	OpAMD64VPSRAVW128
 	OpAMD64VPSRAVWMasked128
 	OpAMD64VPSIGNW128
 	OpAMD64VPSUBW128
@@ -1510,15 +1510,17 @@ const (
 	OpAMD64VPDPWSSDSMasked512
 	OpAMD64VPDPBUSDS512
 	OpAMD64VPDPBUSDSMasked512
+	OpAMD64VPSLLD512
+	OpAMD64VPSLLDMasked512
+	OpAMD64VPSRAD512
+	OpAMD64VPSRADMasked512
 	OpAMD64VPSLLVD512
 	OpAMD64VPSHLDVD512
 	OpAMD64VPSHLDVDMasked512
 	OpAMD64VPSLLVDMasked512
-	OpAMD64VPSRLVD512
+	OpAMD64VPSRAVD512
 	OpAMD64VPSHRDVD512
 	OpAMD64VPSHRDVDMasked512
-	OpAMD64VPSRLVDMasked512
-	OpAMD64VPSRAVD512
 	OpAMD64VPSRAVDMasked512
 	OpAMD64VPSUBD512
 	OpAMD64VPSUBDMasked512
@@ -1557,17 +1559,16 @@ const (
 	OpAMD64VPDPBUSDS128
 	OpAMD64VPDPBUSDSMasked128
 	OpAMD64VPSLLD128
-	OpAMD64VPSRLD128
+	OpAMD64VPSLLDMasked128
 	OpAMD64VPSRAD128
+	OpAMD64VPSRADMasked128
 	OpAMD64VPSLLVD128
 	OpAMD64VPSHLDVD128
 	OpAMD64VPSHLDVDMasked128
 	OpAMD64VPSLLVDMasked128
-	OpAMD64VPSRLVD128
+	OpAMD64VPSRAVD128
 	OpAMD64VPSHRDVD128
 	OpAMD64VPSHRDVDMasked128
-	OpAMD64VPSRLVDMasked128
-	OpAMD64VPSRAVD128
 	OpAMD64VPSRAVDMasked128
 	OpAMD64VPSIGND128
 	OpAMD64VPSUBD128
@@ -1606,17 +1607,16 @@ const (
 	OpAMD64VPDPBUSDS256
 	OpAMD64VPDPBUSDSMasked256
 	OpAMD64VPSLLD256
-	OpAMD64VPSRLD256
+	OpAMD64VPSLLDMasked256
 	OpAMD64VPSRAD256
+	OpAMD64VPSRADMasked256
 	OpAMD64VPSLLVD256
 	OpAMD64VPSHLDVD256
 	OpAMD64VPSHLDVDMasked256
 	OpAMD64VPSLLVDMasked256
-	OpAMD64VPSRLVD256
+	OpAMD64VPSRAVD256
 	OpAMD64VPSHRDVD256
 	OpAMD64VPSHRDVDMasked256
-	OpAMD64VPSRLVDMasked256
-	OpAMD64VPSRAVD256
 	OpAMD64VPSRAVDMasked256
 	OpAMD64VPSIGND256
 	OpAMD64VPSUBD256
@@ -1648,19 +1648,15 @@ const (
 	OpAMD64VPRORVQMasked128
 	OpAMD64VPSLLQ128
 	OpAMD64VPSLLQMasked128
-	OpAMD64VPSRLQ128
-	OpAMD64VPSRLQMasked128
 	OpAMD64VPSRAQ128
 	OpAMD64VPSRAQMasked128
 	OpAMD64VPSLLVQ128
 	OpAMD64VPSHLDVQ128
 	OpAMD64VPSHLDVQMasked128
 	OpAMD64VPSLLVQMasked128
-	OpAMD64VPSRLVQ128
+	OpAMD64VPSRAVQ128
 	OpAMD64VPSHRDVQ128
 	OpAMD64VPSHRDVQMasked128
-	OpAMD64VPSRLVQMasked128
-	OpAMD64VPSRAVQ128
 	OpAMD64VPSRAVQMasked128
 	OpAMD64VPSUBQ128
 	OpAMD64VPSUBQMasked128
@@ -1689,19 +1685,15 @@ const (
 	OpAMD64VPRORVQMasked256
 	OpAMD64VPSLLQ256
 	OpAMD64VPSLLQMasked256
-	OpAMD64VPSRLQ256
-	OpAMD64VPSRLQMasked256
 	OpAMD64VPSRAQ256
 	OpAMD64VPSRAQMasked256
 	OpAMD64VPSLLVQ256
 	OpAMD64VPSHLDVQ256
 	OpAMD64VPSHLDVQMasked256
 	OpAMD64VPSLLVQMasked256
-	OpAMD64VPSRLVQ256
+	OpAMD64VPSRAVQ256
 	OpAMD64VPSHRDVQ256
 	OpAMD64VPSHRDVQMasked256
-	OpAMD64VPSRLVQMasked256
-	OpAMD64VPSRAVQ256
 	OpAMD64VPSRAVQMasked256
 	OpAMD64VPSUBQ256
 	OpAMD64VPSUBQMasked256
@@ -1732,19 +1724,15 @@ const (
 	OpAMD64VPRORVQMasked512
 	OpAMD64VPSLLQ512
 	OpAMD64VPSLLQMasked512
-	OpAMD64VPSRLQ512
-	OpAMD64VPSRLQMasked512
 	OpAMD64VPSRAQ512
 	OpAMD64VPSRAQMasked512
 	OpAMD64VPSLLVQ512
 	OpAMD64VPSHLDVQ512
 	OpAMD64VPSHLDVQMasked512
 	OpAMD64VPSLLVQMasked512
-	OpAMD64VPSRLVQ512
+	OpAMD64VPSRAVQ512
 	OpAMD64VPSHRDVQ512
 	OpAMD64VPSHRDVQMasked512
-	OpAMD64VPSRLVQMasked512
-	OpAMD64VPSRAVQ512
 	OpAMD64VPSRAVQMasked512
 	OpAMD64VPSUBQ512
 	OpAMD64VPSUBQMasked512
@@ -1820,6 +1808,10 @@ const (
 	OpAMD64VPMINUWMasked256
 	OpAMD64VPMULHUW256
 	OpAMD64VPMULHUWMasked256
+	OpAMD64VPSRLW256
+	OpAMD64VPSRLWMasked256
+	OpAMD64VPSRLVW256
+	OpAMD64VPSRLVWMasked256
 	OpAMD64VPAVGW512
 	OpAMD64VPAVGWMasked512
 	OpAMD64VPMAXUW512
@@ -1828,6 +1820,10 @@ const (
 	OpAMD64VPMINUWMasked512
 	OpAMD64VPMULHUW512
 	OpAMD64VPMULHUWMasked512
+	OpAMD64VPSRLW512
+	OpAMD64VPSRLWMasked512
+	OpAMD64VPSRLVW512
+	OpAMD64VPSRLVWMasked512
 	OpAMD64VPAVGW128
 	OpAMD64VPAVGWMasked128
 	OpAMD64VPMAXUW128
@@ -1836,36 +1832,64 @@ const (
 	OpAMD64VPMINUWMasked128
 	OpAMD64VPMULHUW128
 	OpAMD64VPMULHUWMasked128
+	OpAMD64VPSRLW128
+	OpAMD64VPSRLWMasked128
+	OpAMD64VPSRLVW128
+	OpAMD64VPSRLVWMasked128
 	OpAMD64VPMAXUD512
 	OpAMD64VPMAXUDMasked512
 	OpAMD64VPMINUD512
 	OpAMD64VPMINUDMasked512
+	OpAMD64VPSRLD512
+	OpAMD64VPSRLDMasked512
+	OpAMD64VPSRLVD512
+	OpAMD64VPSRLVDMasked512
 	OpAMD64VPMAXUD128
 	OpAMD64VPMAXUDMasked128
 	OpAMD64VPMINUD128
 	OpAMD64VPMINUDMasked128
 	OpAMD64VPMULUDQ128
+	OpAMD64VPSRLD128
+	OpAMD64VPSRLDMasked128
+	OpAMD64VPSRLVD128
+	OpAMD64VPSRLVDMasked128
 	OpAMD64VPMAXUD256
 	OpAMD64VPMAXUDMasked256
 	OpAMD64VPMINUD256
 	OpAMD64VPMINUDMasked256
 	OpAMD64VPMULUDQ256
+	OpAMD64VPSRLD256
+	OpAMD64VPSRLDMasked256
+	OpAMD64VPSRLVD256
+	OpAMD64VPSRLVDMasked256
 	OpAMD64VPMAXUQ128
 	OpAMD64VPMAXUQMasked128
 	OpAMD64VPMINUQ128
 	OpAMD64VPMINUQMasked128
 	OpAMD64VPMULUDQMasked128
+	OpAMD64VPSRLQ128
+	OpAMD64VPSRLQMasked128
+	OpAMD64VPSRLVQ128
+	OpAMD64VPSRLVQMasked128
 	OpAMD64VPMAXUQ256
 	OpAMD64VPMAXUQMasked256
 	OpAMD64VPMINUQ256
 	OpAMD64VPMINUQMasked256
 	OpAMD64VPMULUDQMasked256
+	OpAMD64VPSRLQ256
+	OpAMD64VPSRLQMasked256
+	OpAMD64VPSRLVQ256
+	OpAMD64VPSRLVQMasked256
 	OpAMD64VPMAXUQ512
 	OpAMD64VPMAXUQMasked512
 	OpAMD64VPMINUQ512
 	OpAMD64VPMINUQMasked512
 	OpAMD64VPMULUDQ512
 	OpAMD64VPMULUDQMasked512
+	OpAMD64VPSRLQ512
+	OpAMD64VPSRLQMasked512
+	OpAMD64VPSRLVQ512
+	OpAMD64VPSRLVQMasked512
 	OpAMD64VPAVGB128
 	OpAMD64VPAVGBMasked128
 	OpAMD64VGF2P8MULB128
@@ -4604,8 +4628,9 @@ const (
 	OpSaturatedSubInt16x16
 	OpSaturatedSubMaskedInt16x16
 	OpShiftAllLeftInt16x16
+	OpShiftAllLeftMaskedInt16x16
 	OpShiftAllRightInt16x16
-	OpShiftAllRightSignExtendedInt16x16
+	OpShiftAllRightMaskedInt16x16
 	OpShiftLeftInt16x16
 	OpShiftLeftAndFillUpperFromInt16x16
 	OpShiftLeftAndFillUpperFromMaskedInt16x16
@@ -4614,8 +4639,6 @@ const (
 	OpShiftRightAndFillUpperFromInt16x16
 	OpShiftRightAndFillUpperFromMaskedInt16x16
 	OpShiftRightMaskedInt16x16
-	OpShiftRightSignExtendedInt16x16
-	OpShiftRightSignExtendedMaskedInt16x16
 	OpSignInt16x16
 	OpSubInt16x16
 	OpSubMaskedInt16x16
@@ -4652,6 +4675,10 @@ const (
 	OpSaturatedAddMaskedInt16x32
 	OpSaturatedSubInt16x32
 	OpSaturatedSubMaskedInt16x32
+	OpShiftAllLeftInt16x32
+	OpShiftAllLeftMaskedInt16x32
+	OpShiftAllRightInt16x32
+	OpShiftAllRightMaskedInt16x32
 	OpShiftLeftInt16x32
 	OpShiftLeftAndFillUpperFromInt16x32
 	OpShiftLeftAndFillUpperFromMaskedInt16x32
@@ -4660,8 +4687,6 @@ const (
 	OpShiftRightAndFillUpperFromInt16x32
 	OpShiftRightAndFillUpperFromMaskedInt16x32
 	OpShiftRightMaskedInt16x32
-	OpShiftRightSignExtendedInt16x32
-	OpShiftRightSignExtendedMaskedInt16x32
 	OpSubInt16x32
 	OpSubMaskedInt16x32
 	OpAbsoluteInt16x8
@@ -4704,8 +4729,9 @@ const (
 	OpSaturatedSubInt16x8
 	OpSaturatedSubMaskedInt16x8
 	OpShiftAllLeftInt16x8
+	OpShiftAllLeftMaskedInt16x8
 	OpShiftAllRightInt16x8
-	OpShiftAllRightSignExtendedInt16x8
+	OpShiftAllRightMaskedInt16x8
 	OpShiftLeftInt16x8
 	OpShiftLeftAndFillUpperFromInt16x8
 	OpShiftLeftAndFillUpperFromMaskedInt16x8
@@ -4714,8 +4740,6 @@ const (
 	OpShiftRightAndFillUpperFromInt16x8
 	OpShiftRightAndFillUpperFromMaskedInt16x8
 	OpShiftRightMaskedInt16x8
-	OpShiftRightSignExtendedInt16x8
-	OpShiftRightSignExtendedMaskedInt16x8
 	OpSignInt16x8
 	OpSubInt16x8
 	OpSubMaskedInt16x8
@@ -4760,6 +4784,10 @@ const (
 	OpSaturatedPairDotProdAccumulateMaskedInt32x16
 	OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16
 	OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x16
+	OpShiftAllLeftInt32x16
+	OpShiftAllLeftMaskedInt32x16
+	OpShiftAllRightInt32x16
+	OpShiftAllRightMaskedInt32x16
 	OpShiftLeftInt32x16
 	OpShiftLeftAndFillUpperFromInt32x16
 	OpShiftLeftAndFillUpperFromMaskedInt32x16
@@ -4768,8 +4796,6 @@ const (
 	OpShiftRightAndFillUpperFromInt32x16
 	OpShiftRightAndFillUpperFromMaskedInt32x16
 	OpShiftRightMaskedInt32x16
-	OpShiftRightSignExtendedInt32x16
-	OpShiftRightSignExtendedMaskedInt32x16
 	OpSubInt32x16
 	OpSubMaskedInt32x16
 	OpUnsignedSignedQuadDotProdAccumulateInt32x16
@@ -4820,8 +4846,9 @@ const (
 	OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4
 	OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x4
 	OpShiftAllLeftInt32x4
+	OpShiftAllLeftMaskedInt32x4
 	OpShiftAllRightInt32x4
-	OpShiftAllRightSignExtendedInt32x4
+	OpShiftAllRightMaskedInt32x4
 	OpShiftLeftInt32x4
 	OpShiftLeftAndFillUpperFromInt32x4
 	OpShiftLeftAndFillUpperFromMaskedInt32x4
@@ -4830,8 +4857,6 @@ const (
 	OpShiftRightAndFillUpperFromInt32x4
 	OpShiftRightAndFillUpperFromMaskedInt32x4
 	OpShiftRightMaskedInt32x4
-	OpShiftRightSignExtendedInt32x4
-	OpShiftRightSignExtendedMaskedInt32x4
 	OpSignInt32x4
 	OpSubInt32x4
 	OpSubMaskedInt32x4
@@ -4883,8 +4908,9 @@ const (
 	OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8
 	OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedInt32x8
 	OpShiftAllLeftInt32x8
+	OpShiftAllLeftMaskedInt32x8
 	OpShiftAllRightInt32x8
-	OpShiftAllRightSignExtendedInt32x8
+	OpShiftAllRightMaskedInt32x8
 	OpShiftLeftInt32x8
 	OpShiftLeftAndFillUpperFromInt32x8
 	OpShiftLeftAndFillUpperFromMaskedInt32x8
@@ -4893,8 +4919,6 @@ const (
 	OpShiftRightAndFillUpperFromInt32x8
 	OpShiftRightAndFillUpperFromMaskedInt32x8
 	OpShiftRightMaskedInt32x8
-	OpShiftRightSignExtendedInt32x8
-	OpShiftRightSignExtendedMaskedInt32x8
 	OpSignInt32x8
 	OpSubInt32x8
 	OpSubMaskedInt32x8
@@ -4942,8 +4966,6 @@ const (
 	OpShiftAllLeftMaskedInt64x2
 	OpShiftAllRightInt64x2
 	OpShiftAllRightMaskedInt64x2
-	OpShiftAllRightSignExtendedInt64x2
-	OpShiftAllRightSignExtendedMaskedInt64x2
 	OpShiftLeftInt64x2
 	OpShiftLeftAndFillUpperFromInt64x2
 	OpShiftLeftAndFillUpperFromMaskedInt64x2
@@ -4952,8 +4974,6 @@ const (
 	OpShiftRightAndFillUpperFromInt64x2
 	OpShiftRightAndFillUpperFromMaskedInt64x2
 	OpShiftRightMaskedInt64x2
-	OpShiftRightSignExtendedInt64x2
-	OpShiftRightSignExtendedMaskedInt64x2
 	OpSubInt64x2
 	OpSubMaskedInt64x2
 	OpXorInt64x2
@@ -4998,8 +5018,6 @@ const (
 	OpShiftAllLeftMaskedInt64x4
 	OpShiftAllRightInt64x4
 	OpShiftAllRightMaskedInt64x4
-	OpShiftAllRightSignExtendedInt64x4
-	OpShiftAllRightSignExtendedMaskedInt64x4
 	OpShiftLeftInt64x4
 	OpShiftLeftAndFillUpperFromInt64x4
 	OpShiftLeftAndFillUpperFromMaskedInt64x4
@@ -5008,8 +5026,6 @@ const (
 	OpShiftRightAndFillUpperFromInt64x4
 	OpShiftRightAndFillUpperFromMaskedInt64x4
 	OpShiftRightMaskedInt64x4
-	OpShiftRightSignExtendedInt64x4
-	OpShiftRightSignExtendedMaskedInt64x4
 	OpSubInt64x4
 	OpSubMaskedInt64x4
 	OpXorInt64x4
@@ -5054,8 +5070,6 @@ const (
 	OpShiftAllLeftMaskedInt64x8
 	OpShiftAllRightInt64x8
 	OpShiftAllRightMaskedInt64x8
-	OpShiftAllRightSignExtendedInt64x8
-	OpShiftAllRightSignExtendedMaskedInt64x8
 	OpShiftLeftInt64x8
 	OpShiftLeftAndFillUpperFromInt64x8
 	OpShiftLeftAndFillUpperFromMaskedInt64x8
@@ -5064,8 +5078,6 @@ const (
 	OpShiftRightAndFillUpperFromInt64x8
 	OpShiftRightAndFillUpperFromMaskedInt64x8
 	OpShiftRightMaskedInt64x8
-	OpShiftRightSignExtendedInt64x8
-	OpShiftRightSignExtendedMaskedInt64x8
 	OpSubInt64x8
 	OpSubMaskedInt64x8
 	OpXorInt64x8
@@ -5198,7 +5210,9 @@ const (
 	OpSaturatedSubUint16x16
 	OpSaturatedSubMaskedUint16x16
 	OpShiftAllLeftUint16x16
+	OpShiftAllLeftMaskedUint16x16
 	OpShiftAllRightUint16x16
+	OpShiftAllRightMaskedUint16x16
 	OpShiftLeftUint16x16
 	OpShiftLeftAndFillUpperFromUint16x16
 	OpShiftLeftAndFillUpperFromMaskedUint16x16
@@ -5207,8 +5221,6 @@ const (
 	OpShiftRightAndFillUpperFromUint16x16
 	OpShiftRightAndFillUpperFromMaskedUint16x16
 	OpShiftRightMaskedUint16x16
-	OpShiftRightSignExtendedUint16x16
-	OpShiftRightSignExtendedMaskedUint16x16
 	OpSubUint16x16
 	OpSubMaskedUint16x16
 	OpXorUint16x16
@@ -5240,6 +5252,10 @@ const (
 	OpSaturatedAddMaskedUint16x32
 	OpSaturatedSubUint16x32
 	OpSaturatedSubMaskedUint16x32
+	OpShiftAllLeftUint16x32
+	OpShiftAllLeftMaskedUint16x32
+	OpShiftAllRightUint16x32
+	OpShiftAllRightMaskedUint16x32
 	OpShiftLeftUint16x32
 	OpShiftLeftAndFillUpperFromUint16x32
 	OpShiftLeftAndFillUpperFromMaskedUint16x32
@@ -5248,8 +5264,6 @@ const (
 	OpShiftRightAndFillUpperFromUint16x32
 	OpShiftRightAndFillUpperFromMaskedUint16x32
 	OpShiftRightMaskedUint16x32
-	OpShiftRightSignExtendedUint16x32
-	OpShiftRightSignExtendedMaskedUint16x32
 	OpSubUint16x32
 	OpSubMaskedUint16x32
 	OpAddUint16x8
@@ -5286,7 +5300,9 @@ const (
 	OpSaturatedSubUint16x8
 	OpSaturatedSubMaskedUint16x8
 	OpShiftAllLeftUint16x8
+	OpShiftAllLeftMaskedUint16x8
 	OpShiftAllRightUint16x8
+	OpShiftAllRightMaskedUint16x8
 	OpShiftLeftUint16x8
 	OpShiftLeftAndFillUpperFromUint16x8
 	OpShiftLeftAndFillUpperFromMaskedUint16x8
@@ -5295,8 +5311,6 @@ const (
 	OpShiftRightAndFillUpperFromUint16x8
 	OpShiftRightAndFillUpperFromMaskedUint16x8
 	OpShiftRightMaskedUint16x8
-	OpShiftRightSignExtendedUint16x8
-	OpShiftRightSignExtendedMaskedUint16x8
 	OpSubUint16x8
 	OpSubMaskedUint16x8
 	OpXorUint16x8
@@ -5332,6 +5346,10 @@ const (
 	OpRotateRightMaskedUint32x16
 	OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16
 	OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x16
+	OpShiftAllLeftUint32x16
+	OpShiftAllLeftMaskedUint32x16
+	OpShiftAllRightUint32x16
+	OpShiftAllRightMaskedUint32x16
 	OpShiftLeftUint32x16
 	OpShiftLeftAndFillUpperFromUint32x16
 	OpShiftLeftAndFillUpperFromMaskedUint32x16
@@ -5340,8 +5358,6 @@ const (
 	OpShiftRightAndFillUpperFromUint32x16
 	OpShiftRightAndFillUpperFromMaskedUint32x16
 	OpShiftRightMaskedUint32x16
-	OpShiftRightSignExtendedUint32x16
-	OpShiftRightSignExtendedMaskedUint32x16
 	OpSubUint32x16
 	OpSubMaskedUint32x16
 	OpUnsignedSignedQuadDotProdAccumulateUint32x16
@@ -5384,7 +5400,9 @@ const (
 	OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4
 	OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x4
 	OpShiftAllLeftUint32x4
+	OpShiftAllLeftMaskedUint32x4
 	OpShiftAllRightUint32x4
+	OpShiftAllRightMaskedUint32x4
 	OpShiftLeftUint32x4
 	OpShiftLeftAndFillUpperFromUint32x4
 	OpShiftLeftAndFillUpperFromMaskedUint32x4
@@ -5393,8 +5411,6 @@ const (
 	OpShiftRightAndFillUpperFromUint32x4
 	OpShiftRightAndFillUpperFromMaskedUint32x4
 	OpShiftRightMaskedUint32x4
-	OpShiftRightSignExtendedUint32x4
-	OpShiftRightSignExtendedMaskedUint32x4
 	OpSubUint32x4
 	OpSubMaskedUint32x4
 	OpUnsignedSignedQuadDotProdAccumulateUint32x4
@@ -5437,7 +5453,9 @@ const (
 	OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8
 	OpSaturatedUnsignedSignedQuadDotProdAccumulateMaskedUint32x8
 	OpShiftAllLeftUint32x8
+	OpShiftAllLeftMaskedUint32x8
 	OpShiftAllRightUint32x8
+	OpShiftAllRightMaskedUint32x8
 	OpShiftLeftUint32x8
 	OpShiftLeftAndFillUpperFromUint32x8
 	OpShiftLeftAndFillUpperFromMaskedUint32x8
@@ -5446,8 +5464,6 @@ const (
 	OpShiftRightAndFillUpperFromUint32x8
 	OpShiftRightAndFillUpperFromMaskedUint32x8
 	OpShiftRightMaskedUint32x8
-	OpShiftRightSignExtendedUint32x8
-	OpShiftRightSignExtendedMaskedUint32x8
 	OpSubUint32x8
 	OpSubMaskedUint32x8
 	OpUnsignedSignedQuadDotProdAccumulateUint32x8
@@ -5498,8 +5514,6 @@ const (
 	OpShiftRightAndFillUpperFromUint64x2
 	OpShiftRightAndFillUpperFromMaskedUint64x2
 	OpShiftRightMaskedUint64x2
-	OpShiftRightSignExtendedUint64x2
-	OpShiftRightSignExtendedMaskedUint64x2
 	OpSubUint64x2
 	OpSubMaskedUint64x2
 	OpXorUint64x2
@@ -5548,8 +5562,6 @@ const (
 	OpShiftRightAndFillUpperFromUint64x4
 	OpShiftRightAndFillUpperFromMaskedUint64x4
 	OpShiftRightMaskedUint64x4
-	OpShiftRightSignExtendedUint64x4
-	OpShiftRightSignExtendedMaskedUint64x4
 	OpSubUint64x4
 	OpSubMaskedUint64x4
 	OpXorUint64x4
@@ -5598,8 +5610,6 @@ const (
 	OpShiftRightAndFillUpperFromUint64x8
 	OpShiftRightAndFillUpperFromMaskedUint64x8
 	OpShiftRightMaskedUint64x8
-	OpShiftRightSignExtendedUint64x8
-	OpShiftRightSignExtendedMaskedUint64x8
 	OpSubUint64x8
 	OpSubMaskedUint64x8
 	OpXorUint64x8
@@ -21491,16 +21501,17 @@ var opcodeTable = [...]opInfo{
 		},
 	},
 	{
-		name:   "VPSRLW256",
-		argLen: 2,
-		asm:    x86.AVPSRLW,
+		name:   "VPSLLWMasked256",
+		argLen: 3,
+		asm:    x86.AVPSLLW,
 		reg: regInfo{
 			inputs: []inputInfo{
-				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+				{1, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
 			},
 			outputs: []outputInfo{
-				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
 			},
 		},
 	},
@@ -21518,6 +21529,21 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:   "VPSRAWMasked256",
+		argLen: 3,
+		asm:    x86.AVPSRAW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+				{1, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+			outputs: []outputInfo{
+				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+		},
+	},
 	{
 		name:   "VPSLLVW256",
 		argLen: 2,
@@ -21581,9 +21607,9 @@ var opcodeTable = [...]opInfo{
 		},
 	},
 	{
-		name:   "VPSRLVW256",
+		name:   "VPSRAVW256",
 		argLen: 2,
-		asm:    x86.AVPSRLVW,
+		asm:    x86.AVPSRAVW,
 		reg: regInfo{
 			inputs: []inputInfo{
 				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
@@ -21627,35 +21653,6 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
-	{
-		name:   "VPSRLVWMasked256",
-		argLen: 3,
-		asm:    x86.AVPSRLVW,
-		reg: regInfo{
-			inputs: []inputInfo{
-				{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
-				{0, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-				{1, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-			},
-			outputs: []outputInfo{
-				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-			},
-		},
-	},
-	{
-		name:   "VPSRAVW256",
-		argLen: 2,
-		asm:    x86.AVPSRAVW,
-		reg: regInfo{
-			inputs: []inputInfo{
-				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-				{1, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-			},
-			outputs: []outputInfo{
-				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-			},
-		},
-	},
 	{
 		name:   "VPSRAVWMasked256",
 		argLen: 3,
@@ -22012,6 +22009,64 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:   "VPSLLW512",
+		argLen: 2,
+		asm:    x86.AVPSLLW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{1, 2147418112},      // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+			outputs: []outputInfo{
+				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+		},
+	},
+	{
+		name:   "VPSLLWMasked512",
+		argLen: 3,
+		asm:    x86.AVPSLLW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+				{1, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+			outputs: []outputInfo{
+				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+		},
+	},
+	{
+		name:   "VPSRAW512",
+		argLen: 2,
+		asm:    x86.AVPSRAW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{1, 2147418112},      // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+			outputs: []outputInfo{
+				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+		},
+	},
+	{
+		name:   "VPSRAWMasked512",
+		argLen: 3,
+		asm:    x86.AVPSRAW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+				{1, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+			outputs: []outputInfo{
+				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+		},
+	},
 	{
 		name:   "VPSLLVW512",
 		argLen: 2,
@@ -22075,9 +22130,9 @@ var opcodeTable = [...]opInfo{
 		},
 	},
 	{
-		name:   "VPSRLVW512",
+		name:   "VPSRAVW512",
 		argLen: 2,
-		asm:    x86.AVPSRLVW,
+		asm:    x86.AVPSRAVW,
 		reg: regInfo{
 			inputs: []inputInfo{
 				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
@@ -22121,35 +22176,6 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
-	{
-		name:   "VPSRLVWMasked512",
-		argLen: 3,
-		asm:    x86.AVPSRLVW,
-		reg: regInfo{
-			inputs: []inputInfo{
-				{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
-				{0, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-				{1, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-			},
-			outputs: []outputInfo{
-				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-			},
-		},
-	},
-	{
-		name:   "VPSRAVW512",
-		argLen: 2,
-		asm:    x86.AVPSRAVW,
-		reg: regInfo{
-			inputs: []inputInfo{
-				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-				{1, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-			},
-			outputs: []outputInfo{
-				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-			},
-		},
-	},
 	{
 		name:   "VPSRAVWMasked512",
 		argLen: 3,
@@ -22592,16 +22618,17 @@ var opcodeTable = [...]opInfo{
 		},
 	},
 	{
-		name:   "VPSRLW128",
-		argLen: 2,
-		asm:    x86.AVPSRLW,
+		name:   "VPSLLWMasked128",
+		argLen: 3,
+		asm:    x86.AVPSLLW,
 		reg: regInfo{
 			inputs: []inputInfo{
-				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+				{1, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
 			},
 			outputs: []outputInfo{
-				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
 			},
 		},
 	},
@@ -22619,6 +22646,21 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:   "VPSRAWMasked128",
+		argLen: 3,
+		asm:    x86.AVPSRAW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+				{1, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+			outputs: []outputInfo{
+				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+		},
+	},
 	{
 		name:   "VPSLLVW128",
 		argLen: 2,
@@ -22682,9 +22724,9 @@ var opcodeTable = [...]opInfo{
 		},
 	},
 	{
-		name:   "VPSRLVW128",
+		name:   "VPSRAVW128",
 		argLen: 2,
-		asm:    x86.AVPSRLVW,
+		asm:    x86.AVPSRAVW,
 		reg: regInfo{
 			inputs: []inputInfo{
 				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
@@ -22728,35 +22770,6 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
-	{
-		name:   "VPSRLVWMasked128",
-		argLen: 3,
-		asm:    x86.AVPSRLVW,
-		reg: regInfo{
-			inputs: []inputInfo{
-				{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
-				{0, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-				{1, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-			},
-			outputs: []outputInfo{
-				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-			},
-		},
-	},
-	{
-		name:   "VPSRAVW128",
-		argLen: 2,
-		asm:    x86.AVPSRAVW,
-		reg: regInfo{
-			inputs: []inputInfo{
-				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-				{1, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-			},
-			outputs: []outputInfo{
-				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-			},
-		},
-	},
 	{
 		name:   "VPSRAVWMasked128",
 		argLen: 3,
@@ -23241,6 +23254,64 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:   "VPSLLD512",
+		argLen: 2,
+		asm:    x86.AVPSLLD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{1, 2147418112},      // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+			outputs: []outputInfo{
+				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+		},
+	},
+	{
+		name:   "VPSLLDMasked512",
+		argLen: 3,
+		asm:    x86.AVPSLLD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+				{1, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+			outputs: []outputInfo{
+				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+		},
+	},
+	{
+		name:   "VPSRAD512",
+		argLen: 2,
+		asm:    x86.AVPSRAD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{1, 2147418112},      // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+			outputs: []outputInfo{
+				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+		},
+	},
+	{
+		name:   "VPSRADMasked512",
+		argLen: 3,
+		asm:    x86.AVPSRAD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+				{1, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+			outputs: []outputInfo{
+				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+		},
+	},
 	{
 		name:   "VPSLLVD512",
 		argLen: 2,
@@ -23304,9 +23375,9 @@ var opcodeTable = [...]opInfo{
 		},
 	},
 	{
-		name:   "VPSRLVD512",
+		name:   "VPSRAVD512",
 		argLen: 2,
-		asm:    x86.AVPSRLVD,
+		asm:    x86.AVPSRAVD,
 		reg: regInfo{
 			inputs: []inputInfo{
 				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
@@ -23350,35 +23421,6 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
-	{
-		name:   "VPSRLVDMasked512",
-		argLen: 3,
-		asm:    x86.AVPSRLVD,
-		reg: regInfo{
-			inputs: []inputInfo{
-				{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
-				{0, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-				{1, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-			},
-			outputs: []outputInfo{
-				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-			},
-		},
-	},
-	{
-		name:   "VPSRAVD512",
-		argLen: 2,
-		asm:    x86.AVPSRAVD,
-		reg: regInfo{
-			inputs: []inputInfo{
-				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-				{1, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-			},
-			outputs: []outputInfo{
-				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-			},
-		},
-	},
 	{
 		name:   "VPSRAVDMasked512",
 		argLen: 3,
@@ -23956,16 +23998,17 @@ var opcodeTable = [...]opInfo{
 		},
 	},
 	{
-		name:   "VPSRLD128",
-		argLen: 2,
-		asm:    x86.AVPSRLD,
+		name:   "VPSLLDMasked128",
+		argLen: 3,
+		asm:    x86.AVPSLLD,
 		reg: regInfo{
 			inputs: []inputInfo{
-				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+				{1, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
 			},
 			outputs: []outputInfo{
-				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
 			},
 		},
 	},
@@ -23983,6 +24026,21 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:   "VPSRADMasked128",
+		argLen: 3,
+		asm:    x86.AVPSRAD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+				{1, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+			outputs: []outputInfo{
+				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+		},
+	},
 	{
 		name:   "VPSLLVD128",
 		argLen: 2,
@@ -24046,9 +24104,9 @@ var opcodeTable = [...]opInfo{
 		},
 	},
 	{
-		name:   "VPSRLVD128",
+		name:   "VPSRAVD128",
 		argLen: 2,
-		asm:    x86.AVPSRLVD,
+		asm:    x86.AVPSRAVD,
 		reg: regInfo{
 			inputs: []inputInfo{
 				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
@@ -24092,35 +24150,6 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
-	{
-		name:   "VPSRLVDMasked128",
-		argLen: 3,
-		asm:    x86.AVPSRLVD,
-		reg: regInfo{
-			inputs: []inputInfo{
-				{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
-				{0, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-				{1, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-			},
-			outputs: []outputInfo{
-				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-			},
-		},
-	},
-	{
-		name:   "VPSRAVD128",
-		argLen: 2,
-		asm:    x86.AVPSRAVD,
-		reg: regInfo{
-			inputs: []inputInfo{
-				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-			},
-			outputs: []outputInfo{
-				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-			},
-		},
-	},
 	{
 		name:   "VPSRAVDMasked128",
 		argLen: 3,
@@ -24697,16 +24726,17 @@ var opcodeTable = [...]opInfo{
 		},
 	},
 	{
-		name:   "VPSRLD256",
-		argLen: 2,
-		asm:    x86.AVPSRLD,
+		name:   "VPSLLDMasked256",
+		argLen: 3,
+		asm:    x86.AVPSLLD,
 		reg: regInfo{
 			inputs: []inputInfo{
-				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+				{1, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
 			},
 			outputs: []outputInfo{
-				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
 			},
 		},
 	},
@@ -24724,6 +24754,21 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:   "VPSRADMasked256",
+		argLen: 3,
+		asm:    x86.AVPSRAD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+				{1, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+			outputs: []outputInfo{
+				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+		},
+	},
 	{
 		name:   "VPSLLVD256",
 		argLen: 2,
@@ -24787,9 +24832,9 @@ var opcodeTable = [...]opInfo{
 		},
 	},
 	{
-		name:   "VPSRLVD256",
+		name:   "VPSRAVD256",
 		argLen: 2,
-		asm:    x86.AVPSRLVD,
+		asm:    x86.AVPSRAVD,
 		reg: regInfo{
 			inputs: []inputInfo{
 				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
@@ -24833,35 +24878,6 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
-	{
-		name:   "VPSRLVDMasked256",
-		argLen: 3,
-		asm:    x86.AVPSRLVD,
-		reg: regInfo{
-			inputs: []inputInfo{
-				{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
-				{0, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-				{1, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-			},
-			outputs: []outputInfo{
-				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-			},
-		},
-	},
-	{
-		name:   "VPSRAVD256",
-		argLen: 2,
-		asm:    x86.AVPSRAVD,
-		reg: regInfo{
-			inputs: []inputInfo{
-				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-			},
-			outputs: []outputInfo{
-				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-			},
-		},
-	},
 	{
 		name:   "VPSRAVDMasked256",
 		argLen: 3,
@@ -25326,35 +25342,6 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
-	{
-		name:   "VPSRLQ128",
-		argLen: 2,
-		asm:    x86.AVPSRLQ,
-		reg: regInfo{
-			inputs: []inputInfo{
-				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-			},
-			outputs: []outputInfo{
-				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-			},
-		},
-	},
-	{
-		name:   "VPSRLQMasked128",
-		argLen: 3,
-		asm:    x86.AVPSRLQ,
-		reg: regInfo{
-			inputs: []inputInfo{
-				{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
-				{1, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-				{0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-			},
-			outputs: []outputInfo{
-				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-			},
-		},
-	},
 	{
 		name:   "VPSRAQ128",
 		argLen: 2,
@@ -25447,16 +25434,16 @@ var opcodeTable = [...]opInfo{
 		},
 	},
 	{
-		name:   "VPSRLVQ128",
+		name:   "VPSRAVQ128",
 		argLen: 2,
-		asm:    x86.AVPSRLVQ,
+		asm:    x86.AVPSRAVQ,
 		reg: regInfo{
 			inputs: []inputInfo{
-				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+				{1, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
 			},
 			outputs: []outputInfo{
-				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
 			},
 		},
 	},
@@ -25493,35 +25480,6 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
-	{
-		name:   "VPSRLVQMasked128",
-		argLen: 3,
-		asm:    x86.AVPSRLVQ,
-		reg: regInfo{
-			inputs: []inputInfo{
-				{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
-				{0, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-				{1, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-			},
-			outputs: []outputInfo{
-				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-			},
-		},
-	},
-	{
-		name:   "VPSRAVQ128",
-		argLen: 2,
-		asm:    x86.AVPSRAVQ,
-		reg: regInfo{
-			inputs: []inputInfo{
-				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-				{1, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-			},
-			outputs: []outputInfo{
-				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-			},
-		},
-	},
 	{
 		name:   "VPSRAVQMasked128",
 		argLen: 3,
@@ -25939,35 +25897,6 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
-	{
-		name:   "VPSRLQ256",
-		argLen: 2,
-		asm:    x86.AVPSRLQ,
-		reg: regInfo{
-			inputs: []inputInfo{
-				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-			},
-			outputs: []outputInfo{
-				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-			},
-		},
-	},
-	{
-		name:   "VPSRLQMasked256",
-		argLen: 3,
-		asm:    x86.AVPSRLQ,
-		reg: regInfo{
-			inputs: []inputInfo{
-				{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
-				{1, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-				{0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-			},
-			outputs: []outputInfo{
-				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-			},
-		},
-	},
 	{
 		name:   "VPSRAQ256",
 		argLen: 2,
@@ -26060,16 +25989,16 @@ var opcodeTable = [...]opInfo{
 		},
 	},
 	{
-		name:   "VPSRLVQ256",
+		name:   "VPSRAVQ256",
 		argLen: 2,
-		asm:    x86.AVPSRLVQ,
+		asm:    x86.AVPSRAVQ,
 		reg: regInfo{
 			inputs: []inputInfo{
-				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+				{1, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
 			},
 			outputs: []outputInfo{
-				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
 			},
 		},
 	},
@@ -26106,35 +26035,6 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
-	{
-		name:   "VPSRLVQMasked256",
-		argLen: 3,
-		asm:    x86.AVPSRLVQ,
-		reg: regInfo{
-			inputs: []inputInfo{
-				{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
-				{0, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-				{1, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-			},
-			outputs: []outputInfo{
-				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-			},
-		},
-	},
-	{
-		name:   "VPSRAVQ256",
-		argLen: 2,
-		asm:    x86.AVPSRAVQ,
-		reg: regInfo{
-			inputs: []inputInfo{
-				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-				{1, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-			},
-			outputs: []outputInfo{
-				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-			},
-		},
-	},
 	{
 		name:   "VPSRAVQMasked256",
 		argLen: 3,
@@ -26582,35 +26482,6 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
-	{
-		name:   "VPSRLQ512",
-		argLen: 2,
-		asm:    x86.AVPSRLQ,
-		reg: regInfo{
-			inputs: []inputInfo{
-				{1, 2147418112},      // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-			},
-			outputs: []outputInfo{
-				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-			},
-		},
-	},
-	{
-		name:   "VPSRLQMasked512",
-		argLen: 3,
-		asm:    x86.AVPSRLQ,
-		reg: regInfo{
-			inputs: []inputInfo{
-				{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
-				{1, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-				{0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-			},
-			outputs: []outputInfo{
-				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-			},
-		},
-	},
 	{
 		name:   "VPSRAQ512",
 		argLen: 2,
@@ -26703,9 +26574,9 @@ var opcodeTable = [...]opInfo{
 		},
 	},
 	{
-		name:   "VPSRLVQ512",
+		name:   "VPSRAVQ512",
 		argLen: 2,
-		asm:    x86.AVPSRLVQ,
+		asm:    x86.AVPSRAVQ,
 		reg: regInfo{
 			inputs: []inputInfo{
 				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
@@ -26749,35 +26620,6 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
-	{
-		name:   "VPSRLVQMasked512",
-		argLen: 3,
-		asm:    x86.AVPSRLVQ,
-		reg: regInfo{
-			inputs: []inputInfo{
-				{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
-				{0, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-				{1, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-			},
-			outputs: []outputInfo{
-				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-			},
-		},
-	},
-	{
-		name:   "VPSRAVQ512",
-		argLen: 2,
-		asm:    x86.AVPSRAVQ,
-		reg: regInfo{
-			inputs: []inputInfo{
-				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-				{1, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-			},
-			outputs: []outputInfo{
-				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-			},
-		},
-	},
 	{
 		name:   "VPSRAVQMasked512",
 		argLen: 3,
@@ -27889,6 +27731,64 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:   "VPSRLW256",
+		argLen: 2,
+		asm:    x86.AVPSRLW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRLWMasked256",
+		argLen: 3,
+		asm:    x86.AVPSRLW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+				{1, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+			outputs: []outputInfo{
+				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+		},
+	},
+	{
+		name:   "VPSRLVW256",
+		argLen: 2,
+		asm:    x86.AVPSRLVW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+				{1, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+			outputs: []outputInfo{
+				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+		},
+	},
+	{
+		name:   "VPSRLVWMasked256",
+		argLen: 3,
+		asm:    x86.AVPSRLVW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
 	{
 		name:        "VPAVGW512",
 		argLen:      2,
@@ -28013,6 +27913,64 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:   "VPSRLW512",
+		argLen: 2,
+		asm:    x86.AVPSRLW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{1, 2147418112},      // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+			outputs: []outputInfo{
+				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+		},
+	},
+	{
+		name:   "VPSRLWMasked512",
+		argLen: 3,
+		asm:    x86.AVPSRLW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+				{1, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+			outputs: []outputInfo{
+				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+		},
+	},
+	{
+		name:   "VPSRLVW512",
+		argLen: 2,
+		asm:    x86.AVPSRLVW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+				{1, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+			outputs: []outputInfo{
+				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+		},
+	},
+	{
+		name:   "VPSRLVWMasked512",
+		argLen: 3,
+		asm:    x86.AVPSRLVW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
 	{
 		name:        "VPAVGW128",
 		argLen:      2,
@@ -28137,6 +28095,64 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:   "VPSRLW128",
+		argLen: 2,
+		asm:    x86.AVPSRLW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRLWMasked128",
+		argLen: 3,
+		asm:    x86.AVPSRLW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+				{1, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+			outputs: []outputInfo{
+				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+		},
+	},
+	{
+		name:   "VPSRLVW128",
+		argLen: 2,
+		asm:    x86.AVPSRLVW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+				{1, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+			outputs: []outputInfo{
+				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+		},
+	},
+	{
+		name:   "VPSRLVWMasked128",
+		argLen: 3,
+		asm:    x86.AVPSRLVW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
 	{
 		name:        "VPMAXUD512",
 		argLen:      2,
@@ -28199,6 +28215,64 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:   "VPSRLD512",
+		argLen: 2,
+		asm:    x86.AVPSRLD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{1, 2147418112},      // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+			outputs: []outputInfo{
+				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+		},
+	},
+	{
+		name:   "VPSRLDMasked512",
+		argLen: 3,
+		asm:    x86.AVPSRLD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+				{1, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+			outputs: []outputInfo{
+				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+		},
+	},
+	{
+		name:   "VPSRLVD512",
+		argLen: 2,
+		asm:    x86.AVPSRLVD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+				{1, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+			outputs: []outputInfo{
+				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+		},
+	},
+	{
+		name:   "VPSRLVDMasked512",
+		argLen: 3,
+		asm:    x86.AVPSRLVD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
 	{
 		name:        "VPMAXUD128",
 		argLen:      2,
@@ -28276,6 +28350,64 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:   "VPSRLD128",
+		argLen: 2,
+		asm:    x86.AVPSRLD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRLDMasked128",
+		argLen: 3,
+		asm:    x86.AVPSRLD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+				{1, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+			outputs: []outputInfo{
+				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+		},
+	},
+	{
+		name:   "VPSRLVD128",
+		argLen: 2,
+		asm:    x86.AVPSRLVD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRLVDMasked128",
+		argLen: 3,
+		asm:    x86.AVPSRLVD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
 	{
 		name:        "VPMAXUD256",
 		argLen:      2,
@@ -28353,6 +28485,64 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:   "VPSRLD256",
+		argLen: 2,
+		asm:    x86.AVPSRLD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRLDMasked256",
+		argLen: 3,
+		asm:    x86.AVPSRLD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+				{1, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+			outputs: []outputInfo{
+				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+		},
+	},
+	{
+		name:   "VPSRLVD256",
+		argLen: 2,
+		asm:    x86.AVPSRLVD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRLVDMasked256",
+		argLen: 3,
+		asm:    x86.AVPSRLVD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
 	{
 		name:        "VPMAXUQ128",
 		argLen:      2,
@@ -28431,6 +28621,64 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:   "VPSRLQ128",
+		argLen: 2,
+		asm:    x86.AVPSRLQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRLQMasked128",
+		argLen: 3,
+		asm:    x86.AVPSRLQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+				{1, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+			outputs: []outputInfo{
+				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+		},
+	},
+	{
+		name:   "VPSRLVQ128",
+		argLen: 2,
+		asm:    x86.AVPSRLVQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRLVQMasked128",
+		argLen: 3,
+		asm:    x86.AVPSRLVQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
 	{
 		name:        "VPMAXUQ256",
 		argLen:      2,
@@ -28509,6 +28757,64 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:   "VPSRLQ256",
+		argLen: 2,
+		asm:    x86.AVPSRLQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRLQMasked256",
+		argLen: 3,
+		asm:    x86.AVPSRLQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+				{1, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+			outputs: []outputInfo{
+				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+		},
+	},
+	{
+		name:   "VPSRLVQ256",
+		argLen: 2,
+		asm:    x86.AVPSRLVQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
+	{
+		name:   "VPSRLVQMasked256",
+		argLen: 3,
+		asm:    x86.AVPSRLVQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
 	{
 		name:        "VPMAXUQ512",
 		argLen:      2,
@@ -28602,6 +28908,64 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:   "VPSRLQ512",
+		argLen: 2,
+		asm:    x86.AVPSRLQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{1, 2147418112},      // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+			outputs: []outputInfo{
+				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+		},
+	},
+	{
+		name:   "VPSRLQMasked512",
+		argLen: 3,
+		asm:    x86.AVPSRLQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+				{1, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+			outputs: []outputInfo{
+				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+		},
+	},
+	{
+		name:   "VPSRLVQ512",
+		argLen: 2,
+		asm:    x86.AVPSRLVQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+				{1, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+			outputs: []outputInfo{
+				{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+			},
+		},
+	},
+	{
+		name:   "VPSRLVQMasked512",
+		argLen: 3,
+		asm:    x86.AVPSRLVQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+				{0, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+				{1, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+			outputs: []outputInfo{
+				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+			},
+		},
+	},
 	{
 		name:        "VPAVGB128",
 		argLen:      2,
@@ -60515,14 +60879,19 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
+	{
+		name:    "ShiftAllLeftMaskedInt16x16",
+		argLen:  3,
+		generic: true,
+	},
 	{
 		name:    "ShiftAllRightInt16x16",
 		argLen:  2,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllRightSignExtendedInt16x16",
-		argLen:  2,
+		name:    "ShiftAllRightMaskedInt16x16",
+		argLen:  3,
 		generic: true,
 	},
 	{
@@ -60565,16 +60934,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  3,
 		generic: true,
 	},
-	{
-		name:    "ShiftRightSignExtendedInt16x16",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightSignExtendedMaskedInt16x16",
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "SignInt16x16",
 		argLen:  2,
@@ -60772,6 +61131,26 @@ var opcodeTable = [...]opInfo{
 		argLen:  3,
 		generic: true,
 	},
+	{
+		name:    "ShiftAllLeftInt16x32",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllLeftMaskedInt16x32",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllRightInt16x32",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllRightMaskedInt16x32",
+		argLen:  3,
+		generic: true,
+	},
 	{
 		name:    "ShiftLeftInt16x32",
 		argLen:  2,
@@ -60812,16 +61191,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  3,
 		generic: true,
 	},
-	{
-		name:    "ShiftRightSignExtendedInt16x32",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightSignExtendedMaskedInt16x32",
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "SubInt16x32",
 		argLen:  2,
@@ -61050,14 +61419,19 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
+	{
+		name:    "ShiftAllLeftMaskedInt16x8",
+		argLen:  3,
+		generic: true,
+	},
 	{
 		name:    "ShiftAllRightInt16x8",
 		argLen:  2,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllRightSignExtendedInt16x8",
-		argLen:  2,
+		name:    "ShiftAllRightMaskedInt16x8",
+		argLen:  3,
 		generic: true,
 	},
 	{
@@ -61100,16 +61474,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  3,
 		generic: true,
 	},
-	{
-		name:    "ShiftRightSignExtendedInt16x8",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightSignExtendedMaskedInt16x8",
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "SignInt16x8",
 		argLen:  2,
@@ -61347,6 +61711,26 @@ var opcodeTable = [...]opInfo{
 		argLen:  4,
 		generic: true,
 	},
+	{
+		name:    "ShiftAllLeftInt32x16",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllLeftMaskedInt32x16",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllRightInt32x16",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllRightMaskedInt32x16",
+		argLen:  3,
+		generic: true,
+	},
 	{
 		name:    "ShiftLeftInt32x16",
 		argLen:  2,
@@ -61387,16 +61771,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  3,
 		generic: true,
 	},
-	{
-		name:    "ShiftRightSignExtendedInt32x16",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightSignExtendedMaskedInt32x16",
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "SubInt32x16",
 		argLen:  2,
@@ -61666,14 +62040,19 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
+	{
+		name:    "ShiftAllLeftMaskedInt32x4",
+		argLen:  3,
+		generic: true,
+	},
 	{
 		name:    "ShiftAllRightInt32x4",
 		argLen:  2,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllRightSignExtendedInt32x4",
-		argLen:  2,
+		name:    "ShiftAllRightMaskedInt32x4",
+		argLen:  3,
 		generic: true,
 	},
 	{
@@ -61716,16 +62095,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  3,
 		generic: true,
 	},
-	{
-		name:    "ShiftRightSignExtendedInt32x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightSignExtendedMaskedInt32x4",
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "SignInt32x4",
 		argLen:  2,
@@ -62000,14 +62369,19 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
+	{
+		name:    "ShiftAllLeftMaskedInt32x8",
+		argLen:  3,
+		generic: true,
+	},
 	{
 		name:    "ShiftAllRightInt32x8",
 		argLen:  2,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllRightSignExtendedInt32x8",
-		argLen:  2,
+		name:    "ShiftAllRightMaskedInt32x8",
+		argLen:  3,
 		generic: true,
 	},
 	{
@@ -62050,16 +62424,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  3,
 		generic: true,
 	},
-	{
-		name:    "ShiftRightSignExtendedInt32x8",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightSignExtendedMaskedInt32x8",
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "SignInt32x8",
 		argLen:  2,
@@ -62315,16 +62679,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  3,
 		generic: true,
 	},
-	{
-		name:    "ShiftAllRightSignExtendedInt64x2",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightSignExtendedMaskedInt64x2",
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "ShiftLeftInt64x2",
 		argLen:  2,
@@ -62365,16 +62719,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  3,
 		generic: true,
 	},
-	{
-		name:    "ShiftRightSignExtendedInt64x2",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightSignExtendedMaskedInt64x2",
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "SubInt64x2",
 		argLen:  2,
@@ -62615,16 +62959,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  3,
 		generic: true,
 	},
-	{
-		name:    "ShiftAllRightSignExtendedInt64x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightSignExtendedMaskedInt64x4",
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "ShiftLeftInt64x4",
 		argLen:  2,
@@ -62665,16 +62999,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  3,
 		generic: true,
 	},
-	{
-		name:    "ShiftRightSignExtendedInt64x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightSignExtendedMaskedInt64x4",
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "SubInt64x4",
 		argLen:  2,
@@ -62915,16 +63239,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  3,
 		generic: true,
 	},
-	{
-		name:    "ShiftAllRightSignExtendedInt64x8",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "ShiftAllRightSignExtendedMaskedInt64x8",
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "ShiftLeftInt64x8",
 		argLen:  2,
@@ -62965,16 +63279,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  3,
 		generic: true,
 	},
-	{
-		name:    "ShiftRightSignExtendedInt64x8",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightSignExtendedMaskedInt64x8",
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "SubInt64x8",
 		argLen:  2,
@@ -63697,11 +64001,21 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
+	{
+		name:    "ShiftAllLeftMaskedUint16x16",
+		argLen:  3,
+		generic: true,
+	},
 	{
 		name:    "ShiftAllRightUint16x16",
 		argLen:  2,
 		generic: true,
 	},
+	{
+		name:    "ShiftAllRightMaskedUint16x16",
+		argLen:  3,
+		generic: true,
+	},
 	{
 		name:    "ShiftLeftUint16x16",
 		argLen:  2,
@@ -63742,16 +64056,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  3,
 		generic: true,
 	},
-	{
-		name:    "ShiftRightSignExtendedUint16x16",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightSignExtendedMaskedUint16x16",
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "SubUint16x16",
 		argLen:  2,
@@ -63924,6 +64228,26 @@ var opcodeTable = [...]opInfo{
 		argLen:  3,
 		generic: true,
 	},
+	{
+		name:    "ShiftAllLeftUint16x32",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllLeftMaskedUint16x32",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllRightUint16x32",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllRightMaskedUint16x32",
+		argLen:  3,
+		generic: true,
+	},
 	{
 		name:    "ShiftLeftUint16x32",
 		argLen:  2,
@@ -63964,16 +64288,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  3,
 		generic: true,
 	},
-	{
-		name:    "ShiftRightSignExtendedUint16x32",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightSignExtendedMaskedUint16x32",
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "SubUint16x32",
 		argLen:  2,
@@ -64172,11 +64486,21 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
+	{
+		name:    "ShiftAllLeftMaskedUint16x8",
+		argLen:  3,
+		generic: true,
+	},
 	{
 		name:    "ShiftAllRightUint16x8",
 		argLen:  2,
 		generic: true,
 	},
+	{
+		name:    "ShiftAllRightMaskedUint16x8",
+		argLen:  3,
+		generic: true,
+	},
 	{
 		name:    "ShiftLeftUint16x8",
 		argLen:  2,
@@ -64217,16 +64541,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  3,
 		generic: true,
 	},
-	{
-		name:    "ShiftRightSignExtendedUint16x8",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightSignExtendedMaskedUint16x8",
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "SubUint16x8",
 		argLen:  2,
@@ -64417,6 +64731,26 @@ var opcodeTable = [...]opInfo{
 		argLen:  4,
 		generic: true,
 	},
+	{
+		name:    "ShiftAllLeftUint32x16",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllLeftMaskedUint32x16",
+		argLen:  3,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllRightUint32x16",
+		argLen:  2,
+		generic: true,
+	},
+	{
+		name:    "ShiftAllRightMaskedUint32x16",
+		argLen:  3,
+		generic: true,
+	},
 	{
 		name:    "ShiftLeftUint32x16",
 		argLen:  2,
@@ -64457,16 +64791,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  3,
 		generic: true,
 	},
-	{
-		name:    "ShiftRightSignExtendedUint32x16",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightSignExtendedMaskedUint32x16",
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "SubUint32x16",
 		argLen:  2,
@@ -64694,11 +65018,21 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
+	{
+		name:    "ShiftAllLeftMaskedUint32x4",
+		argLen:  3,
+		generic: true,
+	},
 	{
 		name:    "ShiftAllRightUint32x4",
 		argLen:  2,
 		generic: true,
 	},
+	{
+		name:    "ShiftAllRightMaskedUint32x4",
+		argLen:  3,
+		generic: true,
+	},
 	{
 		name:    "ShiftLeftUint32x4",
 		argLen:  2,
@@ -64739,16 +65073,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  3,
 		generic: true,
 	},
-	{
-		name:    "ShiftRightSignExtendedUint32x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightSignExtendedMaskedUint32x4",
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "SubUint32x4",
 		argLen:  2,
@@ -64976,11 +65300,21 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
+	{
+		name:    "ShiftAllLeftMaskedUint32x8",
+		argLen:  3,
+		generic: true,
+	},
 	{
 		name:    "ShiftAllRightUint32x8",
 		argLen:  2,
 		generic: true,
 	},
+	{
+		name:    "ShiftAllRightMaskedUint32x8",
+		argLen:  3,
+		generic: true,
+	},
 	{
 		name:    "ShiftLeftUint32x8",
 		argLen:  2,
@@ -65021,16 +65355,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  3,
 		generic: true,
 	},
-	{
-		name:    "ShiftRightSignExtendedUint32x8",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightSignExtendedMaskedUint32x8",
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "SubUint32x8",
 		argLen:  2,
@@ -65299,16 +65623,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  3,
 		generic: true,
 	},
-	{
-		name:    "ShiftRightSignExtendedUint64x2",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightSignExtendedMaskedUint64x2",
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "SubUint64x2",
 		argLen:  2,
@@ -65567,16 +65881,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  3,
 		generic: true,
 	},
-	{
-		name:    "ShiftRightSignExtendedUint64x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightSignExtendedMaskedUint64x4",
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "SubUint64x4",
 		argLen:  2,
@@ -65835,16 +66139,6 @@ var opcodeTable = [...]opInfo{
 		argLen:  3,
 		generic: true,
 	},
-	{
-		name:    "ShiftRightSignExtendedUint64x8",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "ShiftRightSignExtendedMaskedUint64x8",
-		argLen:  3,
-		generic: true,
-	},
 	{
 		name:    "SubUint64x8",
 		argLen:  2,
diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go
index d258b3bd0e..d78c9212cb 100644
--- a/src/cmd/compile/internal/ssa/rewriteAMD64.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go
@@ -4131,9 +4131,15 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpShiftAllLeftInt16x16:
 		v.Op = OpAMD64VPSLLW256
 		return true
+	case OpShiftAllLeftInt16x32:
+		v.Op = OpAMD64VPSLLW512
+		return true
 	case OpShiftAllLeftInt16x8:
 		v.Op = OpAMD64VPSLLW128
 		return true
+	case OpShiftAllLeftInt32x16:
+		v.Op = OpAMD64VPSLLD512
+		return true
 	case OpShiftAllLeftInt32x4:
 		v.Op = OpAMD64VPSLLD128
 		return true
@@ -4149,12 +4155,36 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpShiftAllLeftInt64x8:
 		v.Op = OpAMD64VPSLLQ512
 		return true
+	case OpShiftAllLeftMaskedInt16x16:
+		return rewriteValueAMD64_OpShiftAllLeftMaskedInt16x16(v)
+	case OpShiftAllLeftMaskedInt16x32:
+		return rewriteValueAMD64_OpShiftAllLeftMaskedInt16x32(v)
+	case OpShiftAllLeftMaskedInt16x8:
+		return rewriteValueAMD64_OpShiftAllLeftMaskedInt16x8(v)
+	case OpShiftAllLeftMaskedInt32x16:
+		return rewriteValueAMD64_OpShiftAllLeftMaskedInt32x16(v)
+	case OpShiftAllLeftMaskedInt32x4:
+		return rewriteValueAMD64_OpShiftAllLeftMaskedInt32x4(v)
+	case OpShiftAllLeftMaskedInt32x8:
+		return rewriteValueAMD64_OpShiftAllLeftMaskedInt32x8(v)
 	case OpShiftAllLeftMaskedInt64x2:
 		return rewriteValueAMD64_OpShiftAllLeftMaskedInt64x2(v)
 	case OpShiftAllLeftMaskedInt64x4:
 		return rewriteValueAMD64_OpShiftAllLeftMaskedInt64x4(v)
 	case OpShiftAllLeftMaskedInt64x8:
 		return rewriteValueAMD64_OpShiftAllLeftMaskedInt64x8(v)
+	case OpShiftAllLeftMaskedUint16x16:
+		return rewriteValueAMD64_OpShiftAllLeftMaskedUint16x16(v)
+	case OpShiftAllLeftMaskedUint16x32:
+		return rewriteValueAMD64_OpShiftAllLeftMaskedUint16x32(v)
+	case OpShiftAllLeftMaskedUint16x8:
+		return rewriteValueAMD64_OpShiftAllLeftMaskedUint16x8(v)
+	case OpShiftAllLeftMaskedUint32x16:
+		return rewriteValueAMD64_OpShiftAllLeftMaskedUint32x16(v)
+	case OpShiftAllLeftMaskedUint32x4:
+		return rewriteValueAMD64_OpShiftAllLeftMaskedUint32x4(v)
+	case OpShiftAllLeftMaskedUint32x8:
+		return rewriteValueAMD64_OpShiftAllLeftMaskedUint32x8(v)
 	case OpShiftAllLeftMaskedUint64x2:
 		return rewriteValueAMD64_OpShiftAllLeftMaskedUint64x2(v)
 	case OpShiftAllLeftMaskedUint64x4:
@@ -4164,9 +4194,15 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpShiftAllLeftUint16x16:
 		v.Op = OpAMD64VPSLLW256
 		return true
+	case OpShiftAllLeftUint16x32:
+		v.Op = OpAMD64VPSLLW512
+		return true
 	case OpShiftAllLeftUint16x8:
 		v.Op = OpAMD64VPSLLW128
 		return true
+	case OpShiftAllLeftUint32x16:
+		v.Op = OpAMD64VPSLLD512
+		return true
 	case OpShiftAllLeftUint32x4:
 		v.Op = OpAMD64VPSLLD128
 		return true
@@ -4273,71 +4309,80 @@ func rewriteValueAMD64(v *Value) bool {
 		v.Op = OpAMD64VPSHRDQ512
 		return true
 	case OpShiftAllRightInt16x16:
-		v.Op = OpAMD64VPSRLW256
+		v.Op = OpAMD64VPSRAW256
+		return true
+	case OpShiftAllRightInt16x32:
+		v.Op = OpAMD64VPSRAW512
 		return true
 	case OpShiftAllRightInt16x8:
-		v.Op = OpAMD64VPSRLW128
+		v.Op = OpAMD64VPSRAW128
+		return true
+	case OpShiftAllRightInt32x16:
+		v.Op = OpAMD64VPSRAD512
 		return true
 	case OpShiftAllRightInt32x4:
-		v.Op = OpAMD64VPSRLD128
+		v.Op = OpAMD64VPSRAD128
 		return true
 	case OpShiftAllRightInt32x8:
-		v.Op = OpAMD64VPSRLD256
+		v.Op = OpAMD64VPSRAD256
 		return true
 	case OpShiftAllRightInt64x2:
-		v.Op = OpAMD64VPSRLQ128
+		v.Op = OpAMD64VPSRAQ128
 		return true
 	case OpShiftAllRightInt64x4:
-		v.Op = OpAMD64VPSRLQ256
+		v.Op = OpAMD64VPSRAQ256
 		return true
 	case OpShiftAllRightInt64x8:
-		v.Op = OpAMD64VPSRLQ512
+		v.Op = OpAMD64VPSRAQ512
 		return true
+	case OpShiftAllRightMaskedInt16x16:
+		return rewriteValueAMD64_OpShiftAllRightMaskedInt16x16(v)
+	case OpShiftAllRightMaskedInt16x32:
+		return rewriteValueAMD64_OpShiftAllRightMaskedInt16x32(v)
+	case OpShiftAllRightMaskedInt16x8:
+		return rewriteValueAMD64_OpShiftAllRightMaskedInt16x8(v)
+	case OpShiftAllRightMaskedInt32x16:
+		return rewriteValueAMD64_OpShiftAllRightMaskedInt32x16(v)
+	case OpShiftAllRightMaskedInt32x4:
+		return rewriteValueAMD64_OpShiftAllRightMaskedInt32x4(v)
+	case OpShiftAllRightMaskedInt32x8:
+		return rewriteValueAMD64_OpShiftAllRightMaskedInt32x8(v)
 	case OpShiftAllRightMaskedInt64x2:
 		return rewriteValueAMD64_OpShiftAllRightMaskedInt64x2(v)
 	case OpShiftAllRightMaskedInt64x4:
 		return rewriteValueAMD64_OpShiftAllRightMaskedInt64x4(v)
 	case OpShiftAllRightMaskedInt64x8:
 		return rewriteValueAMD64_OpShiftAllRightMaskedInt64x8(v)
+	case OpShiftAllRightMaskedUint16x16:
+		return rewriteValueAMD64_OpShiftAllRightMaskedUint16x16(v)
+	case OpShiftAllRightMaskedUint16x32:
+		return rewriteValueAMD64_OpShiftAllRightMaskedUint16x32(v)
+	case OpShiftAllRightMaskedUint16x8:
+		return rewriteValueAMD64_OpShiftAllRightMaskedUint16x8(v)
+	case OpShiftAllRightMaskedUint32x16:
+		return rewriteValueAMD64_OpShiftAllRightMaskedUint32x16(v)
+	case OpShiftAllRightMaskedUint32x4:
+		return rewriteValueAMD64_OpShiftAllRightMaskedUint32x4(v)
+	case OpShiftAllRightMaskedUint32x8:
+		return rewriteValueAMD64_OpShiftAllRightMaskedUint32x8(v)
 	case OpShiftAllRightMaskedUint64x2:
 		return rewriteValueAMD64_OpShiftAllRightMaskedUint64x2(v)
 	case OpShiftAllRightMaskedUint64x4:
 		return rewriteValueAMD64_OpShiftAllRightMaskedUint64x4(v)
 	case OpShiftAllRightMaskedUint64x8:
 		return rewriteValueAMD64_OpShiftAllRightMaskedUint64x8(v)
-	case OpShiftAllRightSignExtendedInt16x16:
-		v.Op = OpAMD64VPSRAW256
-		return true
-	case OpShiftAllRightSignExtendedInt16x8:
-		v.Op = OpAMD64VPSRAW128
-		return true
-	case OpShiftAllRightSignExtendedInt32x4:
-		v.Op = OpAMD64VPSRAD128
-		return true
-	case OpShiftAllRightSignExtendedInt32x8:
-		v.Op = OpAMD64VPSRAD256
-		return true
-	case OpShiftAllRightSignExtendedInt64x2:
-		v.Op = OpAMD64VPSRAQ128
-		return true
-	case OpShiftAllRightSignExtendedInt64x4:
-		v.Op = OpAMD64VPSRAQ256
-		return true
-	case OpShiftAllRightSignExtendedInt64x8:
-		v.Op = OpAMD64VPSRAQ512
-		return true
-	case OpShiftAllRightSignExtendedMaskedInt64x2:
-		return rewriteValueAMD64_OpShiftAllRightSignExtendedMaskedInt64x2(v)
-	case OpShiftAllRightSignExtendedMaskedInt64x4:
-		return rewriteValueAMD64_OpShiftAllRightSignExtendedMaskedInt64x4(v)
-	case OpShiftAllRightSignExtendedMaskedInt64x8:
-		return rewriteValueAMD64_OpShiftAllRightSignExtendedMaskedInt64x8(v)
 	case OpShiftAllRightUint16x16:
 		v.Op = OpAMD64VPSRLW256
 		return true
+	case OpShiftAllRightUint16x32:
+		v.Op = OpAMD64VPSRLW512
+		return true
 	case OpShiftAllRightUint16x8:
 		v.Op = OpAMD64VPSRLW128
 		return true
+	case OpShiftAllRightUint32x16:
+		v.Op = OpAMD64VPSRLD512
+		return true
 	case OpShiftAllRightUint32x4:
 		v.Op = OpAMD64VPSRLD128
 		return true
@@ -4624,31 +4669,31 @@ func rewriteValueAMD64(v *Value) bool {
 		v.Op = OpAMD64VPSHRDVQ512
 		return true
 	case OpShiftRightInt16x16:
-		v.Op = OpAMD64VPSRLVW256
+		v.Op = OpAMD64VPSRAVW256
 		return true
 	case OpShiftRightInt16x32:
-		v.Op = OpAMD64VPSRLVW512
+		v.Op = OpAMD64VPSRAVW512
 		return true
 	case OpShiftRightInt16x8:
-		v.Op = OpAMD64VPSRLVW128
+		v.Op = OpAMD64VPSRAVW128
 		return true
 	case OpShiftRightInt32x16:
-		v.Op = OpAMD64VPSRLVD512
+		v.Op = OpAMD64VPSRAVD512
 		return true
 	case OpShiftRightInt32x4:
-		v.Op = OpAMD64VPSRLVD128
+		v.Op = OpAMD64VPSRAVD128
 		return true
 	case OpShiftRightInt32x8:
-		v.Op = OpAMD64VPSRLVD256
+		v.Op = OpAMD64VPSRAVD256
 		return true
 	case OpShiftRightInt64x2:
-		v.Op = OpAMD64VPSRLVQ128
+		v.Op = OpAMD64VPSRAVQ128
 		return true
 	case OpShiftRightInt64x4:
-		v.Op = OpAMD64VPSRLVQ256
+		v.Op = OpAMD64VPSRAVQ256
 		return true
 	case OpShiftRightInt64x8:
-		v.Op = OpAMD64VPSRLVQ512
+		v.Op = OpAMD64VPSRAVQ512
 		return true
 	case OpShiftRightMaskedInt16x16:
 		return rewriteValueAMD64_OpShiftRightMaskedInt16x16(v)
@@ -4686,96 +4731,6 @@ func rewriteValueAMD64(v *Value) bool {
 		return rewriteValueAMD64_OpShiftRightMaskedUint64x4(v)
 	case OpShiftRightMaskedUint64x8:
 		return rewriteValueAMD64_OpShiftRightMaskedUint64x8(v)
-	case OpShiftRightSignExtendedInt16x16:
-		v.Op = OpAMD64VPSRAVW256
-		return true
-	case OpShiftRightSignExtendedInt16x32:
-		v.Op = OpAMD64VPSRAVW512
-		return true
-	case OpShiftRightSignExtendedInt16x8:
-		v.Op = OpAMD64VPSRAVW128
-		return true
-	case OpShiftRightSignExtendedInt32x16:
-		v.Op = OpAMD64VPSRAVD512
-		return true
-	case OpShiftRightSignExtendedInt32x4:
-		v.Op = OpAMD64VPSRAVD128
-		return true
-	case OpShiftRightSignExtendedInt32x8:
-		v.Op = OpAMD64VPSRAVD256
-		return true
-	case OpShiftRightSignExtendedInt64x2:
-		v.Op = OpAMD64VPSRAVQ128
-		return true
-	case OpShiftRightSignExtendedInt64x4:
-		v.Op = OpAMD64VPSRAVQ256
-		return true
-	case OpShiftRightSignExtendedInt64x8:
-		v.Op = OpAMD64VPSRAVQ512
-		return true
-	case OpShiftRightSignExtendedMaskedInt16x16:
-		return rewriteValueAMD64_OpShiftRightSignExtendedMaskedInt16x16(v)
-	case OpShiftRightSignExtendedMaskedInt16x32:
-		return rewriteValueAMD64_OpShiftRightSignExtendedMaskedInt16x32(v)
-	case OpShiftRightSignExtendedMaskedInt16x8:
-		return rewriteValueAMD64_OpShiftRightSignExtendedMaskedInt16x8(v)
-	case OpShiftRightSignExtendedMaskedInt32x16:
-		return rewriteValueAMD64_OpShiftRightSignExtendedMaskedInt32x16(v)
-	case OpShiftRightSignExtendedMaskedInt32x4:
-		return rewriteValueAMD64_OpShiftRightSignExtendedMaskedInt32x4(v)
-	case OpShiftRightSignExtendedMaskedInt32x8:
-		return rewriteValueAMD64_OpShiftRightSignExtendedMaskedInt32x8(v)
-	case OpShiftRightSignExtendedMaskedInt64x2:
-		return rewriteValueAMD64_OpShiftRightSignExtendedMaskedInt64x2(v)
-	case OpShiftRightSignExtendedMaskedInt64x4:
-		return rewriteValueAMD64_OpShiftRightSignExtendedMaskedInt64x4(v)
-	case OpShiftRightSignExtendedMaskedInt64x8:
-		return rewriteValueAMD64_OpShiftRightSignExtendedMaskedInt64x8(v)
-	case OpShiftRightSignExtendedMaskedUint16x16:
-		return rewriteValueAMD64_OpShiftRightSignExtendedMaskedUint16x16(v)
-	case OpShiftRightSignExtendedMaskedUint16x32:
-		return rewriteValueAMD64_OpShiftRightSignExtendedMaskedUint16x32(v)
-	case OpShiftRightSignExtendedMaskedUint16x8:
-		return rewriteValueAMD64_OpShiftRightSignExtendedMaskedUint16x8(v)
-	case OpShiftRightSignExtendedMaskedUint32x16:
-		return rewriteValueAMD64_OpShiftRightSignExtendedMaskedUint32x16(v)
-	case OpShiftRightSignExtendedMaskedUint32x4:
-		return rewriteValueAMD64_OpShiftRightSignExtendedMaskedUint32x4(v)
-	case OpShiftRightSignExtendedMaskedUint32x8:
-		return rewriteValueAMD64_OpShiftRightSignExtendedMaskedUint32x8(v)
-	case OpShiftRightSignExtendedMaskedUint64x2:
-		return rewriteValueAMD64_OpShiftRightSignExtendedMaskedUint64x2(v)
-	case OpShiftRightSignExtendedMaskedUint64x4:
-		return rewriteValueAMD64_OpShiftRightSignExtendedMaskedUint64x4(v)
-	case OpShiftRightSignExtendedMaskedUint64x8:
-		return rewriteValueAMD64_OpShiftRightSignExtendedMaskedUint64x8(v)
-	case OpShiftRightSignExtendedUint16x16:
-		v.Op = OpAMD64VPSRAVW256
-		return true
-	case OpShiftRightSignExtendedUint16x32:
-		v.Op = OpAMD64VPSRAVW512
-		return true
-	case OpShiftRightSignExtendedUint16x8:
-		v.Op = OpAMD64VPSRAVW128
-		return true
-	case OpShiftRightSignExtendedUint32x16:
-		v.Op = OpAMD64VPSRAVD512
-		return true
-	case OpShiftRightSignExtendedUint32x4:
-		v.Op = OpAMD64VPSRAVD128
-		return true
-	case OpShiftRightSignExtendedUint32x8:
-		v.Op = OpAMD64VPSRAVD256
-		return true
-	case OpShiftRightSignExtendedUint64x2:
-		v.Op = OpAMD64VPSRAVQ128
-		return true
-	case OpShiftRightSignExtendedUint64x4:
-		v.Op = OpAMD64VPSRAVQ256
-		return true
-	case OpShiftRightSignExtendedUint64x8:
-		v.Op = OpAMD64VPSRAVQ512
-		return true
 	case OpShiftRightUint16x16:
 		v.Op = OpAMD64VPSRLVW256
 		return true
@@ -48631,6 +48586,114 @@ func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedUint64x8(v *Value) bo
 		return true
 	}
 }
+func rewriteValueAMD64_OpShiftAllLeftMaskedInt16x16(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (ShiftAllLeftMaskedInt16x16 x y mask)
+	// result: (VPSLLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSLLWMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpShiftAllLeftMaskedInt16x32(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (ShiftAllLeftMaskedInt16x32 x y mask)
+	// result: (VPSLLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSLLWMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpShiftAllLeftMaskedInt16x8(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (ShiftAllLeftMaskedInt16x8 x y mask)
+	// result: (VPSLLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSLLWMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpShiftAllLeftMaskedInt32x16(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (ShiftAllLeftMaskedInt32x16 x y mask)
+	// result: (VPSLLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSLLDMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpShiftAllLeftMaskedInt32x4(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (ShiftAllLeftMaskedInt32x4 x y mask)
+	// result: (VPSLLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSLLDMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpShiftAllLeftMaskedInt32x8(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (ShiftAllLeftMaskedInt32x8 x y mask)
+	// result: (VPSLLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSLLDMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
 func rewriteValueAMD64_OpShiftAllLeftMaskedInt64x2(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
@@ -48685,6 +48748,114 @@ func rewriteValueAMD64_OpShiftAllLeftMaskedInt64x8(v *Value) bool {
 		return true
 	}
 }
+func rewriteValueAMD64_OpShiftAllLeftMaskedUint16x16(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (ShiftAllLeftMaskedUint16x16 x y mask)
+	// result: (VPSLLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSLLWMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpShiftAllLeftMaskedUint16x32(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (ShiftAllLeftMaskedUint16x32 x y mask)
+	// result: (VPSLLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSLLWMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpShiftAllLeftMaskedUint16x8(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (ShiftAllLeftMaskedUint16x8 x y mask)
+	// result: (VPSLLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSLLWMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpShiftAllLeftMaskedUint32x16(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (ShiftAllLeftMaskedUint32x16 x y mask)
+	// result: (VPSLLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSLLDMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpShiftAllLeftMaskedUint32x4(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (ShiftAllLeftMaskedUint32x4 x y mask)
+	// result: (VPSLLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSLLDMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpShiftAllLeftMaskedUint32x8(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (ShiftAllLeftMaskedUint32x8 x y mask)
+	// result: (VPSLLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSLLDMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
 func rewriteValueAMD64_OpShiftAllLeftMaskedUint64x2(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
@@ -49099,18 +49270,126 @@ func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedUint64x8(v *Value) b
 		return true
 	}
 }
+func rewriteValueAMD64_OpShiftAllRightMaskedInt16x16(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (ShiftAllRightMaskedInt16x16 x y mask)
+	// result: (VPSRAWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSRAWMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpShiftAllRightMaskedInt16x32(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (ShiftAllRightMaskedInt16x32 x y mask)
+	// result: (VPSRAWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSRAWMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpShiftAllRightMaskedInt16x8(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (ShiftAllRightMaskedInt16x8 x y mask)
+	// result: (VPSRAWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSRAWMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpShiftAllRightMaskedInt32x16(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (ShiftAllRightMaskedInt32x16 x y mask)
+	// result: (VPSRADMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSRADMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpShiftAllRightMaskedInt32x4(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (ShiftAllRightMaskedInt32x4 x y mask)
+	// result: (VPSRADMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSRADMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpShiftAllRightMaskedInt32x8(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (ShiftAllRightMaskedInt32x8 x y mask)
+	// result: (VPSRADMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSRADMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
 func rewriteValueAMD64_OpShiftAllRightMaskedInt64x2(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
 	// match: (ShiftAllRightMaskedInt64x2 x y mask)
-	// result: (VPSRLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+	// result: (VPSRAQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
 	for {
 		x := v_0
 		y := v_1
 		mask := v_2
-		v.reset(OpAMD64VPSRLQMasked128)
+		v.reset(OpAMD64VPSRAQMasked128)
 		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
 		v0.AddArg(mask)
 		v.AddArg3(x, y, v0)
@@ -49123,12 +49402,12 @@ func rewriteValueAMD64_OpShiftAllRightMaskedInt64x4(v *Value) bool {
 	v_0 := v.Args[0]
 	b := v.Block
 	// match: (ShiftAllRightMaskedInt64x4 x y mask)
-	// result: (VPSRLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+	// result: (VPSRAQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
 	for {
 		x := v_0
 		y := v_1
 		mask := v_2
-		v.reset(OpAMD64VPSRLQMasked256)
+		v.reset(OpAMD64VPSRAQMasked256)
 		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
 		v0.AddArg(mask)
 		v.AddArg3(x, y, v0)
@@ -49141,120 +49420,174 @@ func rewriteValueAMD64_OpShiftAllRightMaskedInt64x8(v *Value) bool {
 	v_0 := v.Args[0]
 	b := v.Block
 	// match: (ShiftAllRightMaskedInt64x8 x y mask)
-	// result: (VPSRLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+	// result: (VPSRAQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 	for {
 		x := v_0
 		y := v_1
 		mask := v_2
-		v.reset(OpAMD64VPSRLQMasked512)
+		v.reset(OpAMD64VPSRAQMasked512)
 		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
 		v0.AddArg(mask)
 		v.AddArg3(x, y, v0)
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftAllRightMaskedUint64x2(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllRightMaskedUint16x16(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftAllRightMaskedUint64x2 x y mask)
-	// result: (VPSRLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+	// match: (ShiftAllRightMaskedUint16x16 x y mask)
+	// result: (VPSRLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
 	for {
 		x := v_0
 		y := v_1
 		mask := v_2
-		v.reset(OpAMD64VPSRLQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+		v.reset(OpAMD64VPSRLWMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
 		v0.AddArg(mask)
 		v.AddArg3(x, y, v0)
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftAllRightMaskedUint64x4(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllRightMaskedUint16x32(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftAllRightMaskedUint64x4 x y mask)
-	// result: (VPSRLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+	// match: (ShiftAllRightMaskedUint16x32 x y mask)
+	// result: (VPSRLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
 	for {
 		x := v_0
 		y := v_1
 		mask := v_2
-		v.reset(OpAMD64VPSRLQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+		v.reset(OpAMD64VPSRLWMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
 		v0.AddArg(mask)
 		v.AddArg3(x, y, v0)
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftAllRightMaskedUint64x8(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllRightMaskedUint16x8(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftAllRightMaskedUint64x8 x y mask)
-	// result: (VPSRLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+	// match: (ShiftAllRightMaskedUint16x8 x y mask)
+	// result: (VPSRLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
 	for {
 		x := v_0
 		y := v_1
 		mask := v_2
-		v.reset(OpAMD64VPSRLQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+		v.reset(OpAMD64VPSRLWMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
 		v0.AddArg(mask)
 		v.AddArg3(x, y, v0)
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftAllRightSignExtendedMaskedInt64x2(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllRightMaskedUint32x16(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftAllRightSignExtendedMaskedInt64x2 x y mask)
-	// result: (VPSRAQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+	// match: (ShiftAllRightMaskedUint32x16 x y mask)
+	// result: (VPSRLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
 	for {
 		x := v_0
 		y := v_1
 		mask := v_2
-		v.reset(OpAMD64VPSRAQMasked128)
+		v.reset(OpAMD64VPSRLDMasked512)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpShiftAllRightMaskedUint32x4(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (ShiftAllRightMaskedUint32x4 x y mask)
+	// result: (VPSRLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSRLDMasked128)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpShiftAllRightMaskedUint32x8(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (ShiftAllRightMaskedUint32x8 x y mask)
+	// result: (VPSRLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSRLDMasked256)
+		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+		v0.AddArg(mask)
+		v.AddArg3(x, y, v0)
+		return true
+	}
+}
+func rewriteValueAMD64_OpShiftAllRightMaskedUint64x2(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	// match: (ShiftAllRightMaskedUint64x2 x y mask)
+	// result: (VPSRLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+	for {
+		x := v_0
+		y := v_1
+		mask := v_2
+		v.reset(OpAMD64VPSRLQMasked128)
 		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
 		v0.AddArg(mask)
 		v.AddArg3(x, y, v0)
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftAllRightSignExtendedMaskedInt64x4(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllRightMaskedUint64x4(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftAllRightSignExtendedMaskedInt64x4 x y mask)
-	// result: (VPSRAQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+	// match: (ShiftAllRightMaskedUint64x4 x y mask)
+	// result: (VPSRLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
 	for {
 		x := v_0
 		y := v_1
 		mask := v_2
-		v.reset(OpAMD64VPSRAQMasked256)
+		v.reset(OpAMD64VPSRLQMasked256)
 		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
 		v0.AddArg(mask)
 		v.AddArg3(x, y, v0)
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftAllRightSignExtendedMaskedInt64x8(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllRightMaskedUint64x8(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftAllRightSignExtendedMaskedInt64x8 x y mask)
-	// result: (VPSRAQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+	// match: (ShiftAllRightMaskedUint64x8 x y mask)
+	// result: (VPSRLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 	for {
 		x := v_0
 		y := v_1
 		mask := v_2
-		v.reset(OpAMD64VPSRAQMasked512)
+		v.reset(OpAMD64VPSRLQMasked512)
 		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
 		v0.AddArg(mask)
 		v.AddArg3(x, y, v0)
@@ -50311,12 +50644,12 @@ func rewriteValueAMD64_OpShiftRightMaskedInt16x16(v *Value) bool {
 	v_0 := v.Args[0]
 	b := v.Block
 	// match: (ShiftRightMaskedInt16x16 x y mask)
-	// result: (VPSRLVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+	// result: (VPSRAVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
 	for {
 		x := v_0
 		y := v_1
 		mask := v_2
-		v.reset(OpAMD64VPSRLVWMasked256)
+		v.reset(OpAMD64VPSRAVWMasked256)
 		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
 		v0.AddArg(mask)
 		v.AddArg3(x, y, v0)
@@ -50329,12 +50662,12 @@ func rewriteValueAMD64_OpShiftRightMaskedInt16x32(v *Value) bool {
 	v_0 := v.Args[0]
 	b := v.Block
 	// match: (ShiftRightMaskedInt16x32 x y mask)
-	// result: (VPSRLVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+	// result: (VPSRAVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
 	for {
 		x := v_0
 		y := v_1
 		mask := v_2
-		v.reset(OpAMD64VPSRLVWMasked512)
+		v.reset(OpAMD64VPSRAVWMasked512)
 		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
 		v0.AddArg(mask)
 		v.AddArg3(x, y, v0)
@@ -50347,12 +50680,12 @@ func rewriteValueAMD64_OpShiftRightMaskedInt16x8(v *Value) bool {
 	v_0 := v.Args[0]
 	b := v.Block
 	// match: (ShiftRightMaskedInt16x8 x y mask)
-	// result: (VPSRLVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+	// result: (VPSRAVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
 	for {
 		x := v_0
 		y := v_1
 		mask := v_2
-		v.reset(OpAMD64VPSRLVWMasked128)
+		v.reset(OpAMD64VPSRAVWMasked128)
 		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
 		v0.AddArg(mask)
 		v.AddArg3(x, y, v0)
@@ -50365,12 +50698,12 @@ func rewriteValueAMD64_OpShiftRightMaskedInt32x16(v *Value) bool {
 	v_0 := v.Args[0]
 	b := v.Block
 	// match: (ShiftRightMaskedInt32x16 x y mask)
-	// result: (VPSRLVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+	// result: (VPSRAVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
 	for {
 		x := v_0
 		y := v_1
 		mask := v_2
-		v.reset(OpAMD64VPSRLVDMasked512)
+		v.reset(OpAMD64VPSRAVDMasked512)
 		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
 		v0.AddArg(mask)
 		v.AddArg3(x, y, v0)
@@ -50383,12 +50716,12 @@ func rewriteValueAMD64_OpShiftRightMaskedInt32x4(v *Value) bool {
 	v_0 := v.Args[0]
 	b := v.Block
 	// match: (ShiftRightMaskedInt32x4 x y mask)
-	// result: (VPSRLVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+	// result: (VPSRAVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
 	for {
 		x := v_0
 		y := v_1
 		mask := v_2
-		v.reset(OpAMD64VPSRLVDMasked128)
+		v.reset(OpAMD64VPSRAVDMasked128)
 		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
 		v0.AddArg(mask)
 		v.AddArg3(x, y, v0)
@@ -50401,12 +50734,12 @@ func rewriteValueAMD64_OpShiftRightMaskedInt32x8(v *Value) bool {
 	v_0 := v.Args[0]
 	b := v.Block
 	// match: (ShiftRightMaskedInt32x8 x y mask)
-	// result: (VPSRLVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+	// result: (VPSRAVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
 	for {
 		x := v_0
 		y := v_1
 		mask := v_2
-		v.reset(OpAMD64VPSRLVDMasked256)
+		v.reset(OpAMD64VPSRAVDMasked256)
 		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
 		v0.AddArg(mask)
 		v.AddArg3(x, y, v0)
@@ -50419,12 +50752,12 @@ func rewriteValueAMD64_OpShiftRightMaskedInt64x2(v *Value) bool {
 	v_0 := v.Args[0]
 	b := v.Block
 	// match: (ShiftRightMaskedInt64x2 x y mask)
-	// result: (VPSRLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+	// result: (VPSRAVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
 	for {
 		x := v_0
 		y := v_1
 		mask := v_2
-		v.reset(OpAMD64VPSRLVQMasked128)
+		v.reset(OpAMD64VPSRAVQMasked128)
 		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
 		v0.AddArg(mask)
 		v.AddArg3(x, y, v0)
@@ -50437,12 +50770,12 @@ func rewriteValueAMD64_OpShiftRightMaskedInt64x4(v *Value) bool {
 	v_0 := v.Args[0]
 	b := v.Block
 	// match: (ShiftRightMaskedInt64x4 x y mask)
-	// result: (VPSRLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+	// result: (VPSRAVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
 	for {
 		x := v_0
 		y := v_1
 		mask := v_2
-		v.reset(OpAMD64VPSRLVQMasked256)
+		v.reset(OpAMD64VPSRAVQMasked256)
 		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
 		v0.AddArg(mask)
 		v.AddArg3(x, y, v0)
@@ -50455,12 +50788,12 @@ func rewriteValueAMD64_OpShiftRightMaskedInt64x8(v *Value) bool {
 	v_0 := v.Args[0]
 	b := v.Block
 	// match: (ShiftRightMaskedInt64x8 x y mask)
-	// result: (VPSRLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+	// result: (VPSRAVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 	for {
 		x := v_0
 		y := v_1
 		mask := v_2
-		v.reset(OpAMD64VPSRLVQMasked512)
+		v.reset(OpAMD64VPSRAVQMasked512)
 		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
 		v0.AddArg(mask)
 		v.AddArg3(x, y, v0)
@@ -50629,330 +50962,6 @@ func rewriteValueAMD64_OpShiftRightMaskedUint64x8(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftRightSignExtendedMaskedInt16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightSignExtendedMaskedInt16x16 x y mask)
-	// result: (VPSRAVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRAVWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightSignExtendedMaskedInt16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightSignExtendedMaskedInt16x32 x y mask)
-	// result: (VPSRAVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRAVWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightSignExtendedMaskedInt16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightSignExtendedMaskedInt16x8 x y mask)
-	// result: (VPSRAVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRAVWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightSignExtendedMaskedInt32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightSignExtendedMaskedInt32x16 x y mask)
-	// result: (VPSRAVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRAVDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightSignExtendedMaskedInt32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightSignExtendedMaskedInt32x4 x y mask)
-	// result: (VPSRAVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRAVDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightSignExtendedMaskedInt32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightSignExtendedMaskedInt32x8 x y mask)
-	// result: (VPSRAVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRAVDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightSignExtendedMaskedInt64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightSignExtendedMaskedInt64x2 x y mask)
-	// result: (VPSRAVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRAVQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightSignExtendedMaskedInt64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightSignExtendedMaskedInt64x4 x y mask)
-	// result: (VPSRAVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRAVQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightSignExtendedMaskedInt64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightSignExtendedMaskedInt64x8 x y mask)
-	// result: (VPSRAVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRAVQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightSignExtendedMaskedUint16x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightSignExtendedMaskedUint16x16 x y mask)
-	// result: (VPSRAVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRAVWMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightSignExtendedMaskedUint16x32(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightSignExtendedMaskedUint16x32 x y mask)
-	// result: (VPSRAVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRAVWMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightSignExtendedMaskedUint16x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightSignExtendedMaskedUint16x8 x y mask)
-	// result: (VPSRAVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRAVWMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightSignExtendedMaskedUint32x16(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightSignExtendedMaskedUint32x16 x y mask)
-	// result: (VPSRAVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRAVDMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightSignExtendedMaskedUint32x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightSignExtendedMaskedUint32x4 x y mask)
-	// result: (VPSRAVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRAVDMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightSignExtendedMaskedUint32x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightSignExtendedMaskedUint32x8 x y mask)
-	// result: (VPSRAVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRAVDMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightSignExtendedMaskedUint64x2(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightSignExtendedMaskedUint64x2 x y mask)
-	// result: (VPSRAVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRAVQMasked128)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightSignExtendedMaskedUint64x4(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightSignExtendedMaskedUint64x4 x y mask)
-	// result: (VPSRAVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRAVQMasked256)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpShiftRightSignExtendedMaskedUint64x8(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	// match: (ShiftRightSignExtendedMaskedUint64x8 x y mask)
-	// result: (VPSRAVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-	for {
-		x := v_0
-		y := v_1
-		mask := v_2
-		v.reset(OpAMD64VPSRAVQMasked512)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-		v0.AddArg(mask)
-		v.AddArg3(x, y, v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpSlicemask(v *Value) bool {
 	v_0 := v.Args[0]
 	b := v.Block
diff --git a/src/cmd/compile/internal/ssagen/simdintrinsics.go b/src/cmd/compile/internal/ssagen/simdintrinsics.go
index ffd341d6ab..085c0b8d99 100644
--- a/src/cmd/compile/internal/ssagen/simdintrinsics.go
+++ b/src/cmd/compile/internal/ssagen/simdintrinsics.go
@@ -1250,15 +1250,19 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint64x2.SetElem", opLen2Imm8(ssa.OpSetElemUint64x2, types.TypeVec128, 0), sys.AMD64)
 	addF(simdPackage, "Int16x8.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftInt16x8, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int16x16.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftInt16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int16x32.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftInt16x32, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int32x4.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftInt32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int32x8.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftInt32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int32x16.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftInt32x16, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int64x2.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftInt64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int64x4.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftInt64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int64x8.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftInt64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Uint16x8.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftUint16x8, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint16x16.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftUint16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint16x32.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftUint16x32, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Uint32x4.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftUint32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint32x8.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftUint32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint32x16.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftUint32x16, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Uint64x2.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftUint64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint64x4.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftUint64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint64x8.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftUint64x8, types.TypeVec512), sys.AMD64)
@@ -1298,23 +1302,39 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint64x2.ShiftAllLeftAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllLeftAndFillUpperFromMaskedUint64x2, types.TypeVec128, 0), sys.AMD64)
 	addF(simdPackage, "Uint64x4.ShiftAllLeftAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllLeftAndFillUpperFromMaskedUint64x4, types.TypeVec256, 0), sys.AMD64)
 	addF(simdPackage, "Uint64x8.ShiftAllLeftAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllLeftAndFillUpperFromMaskedUint64x8, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Int16x8.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt16x8, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int16x16.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int16x32.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt16x32, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int32x4.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt32x4, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int32x8.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int32x16.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt32x16, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int64x2.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int64x4.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int64x8.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt64x8, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint16x8.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint16x8, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint16x16.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint16x32.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint16x32, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint32x4.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint32x4, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint32x8.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint32x16.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint32x16, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Uint64x2.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint64x4.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint64x8.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int16x8.ShiftAllRight", opLen2(ssa.OpShiftAllRightInt16x8, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int16x16.ShiftAllRight", opLen2(ssa.OpShiftAllRightInt16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int16x32.ShiftAllRight", opLen2(ssa.OpShiftAllRightInt16x32, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int32x4.ShiftAllRight", opLen2(ssa.OpShiftAllRightInt32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int32x8.ShiftAllRight", opLen2(ssa.OpShiftAllRightInt32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int32x16.ShiftAllRight", opLen2(ssa.OpShiftAllRightInt32x16, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int64x2.ShiftAllRight", opLen2(ssa.OpShiftAllRightInt64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int64x4.ShiftAllRight", opLen2(ssa.OpShiftAllRightInt64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int64x8.ShiftAllRight", opLen2(ssa.OpShiftAllRightInt64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Uint16x8.ShiftAllRight", opLen2(ssa.OpShiftAllRightUint16x8, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint16x16.ShiftAllRight", opLen2(ssa.OpShiftAllRightUint16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint16x32.ShiftAllRight", opLen2(ssa.OpShiftAllRightUint16x32, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Uint32x4.ShiftAllRight", opLen2(ssa.OpShiftAllRightUint32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint32x8.ShiftAllRight", opLen2(ssa.OpShiftAllRightUint32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint32x16.ShiftAllRight", opLen2(ssa.OpShiftAllRightUint32x16, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Uint64x2.ShiftAllRight", opLen2(ssa.OpShiftAllRightUint64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint64x4.ShiftAllRight", opLen2(ssa.OpShiftAllRightUint64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint64x8.ShiftAllRight", opLen2(ssa.OpShiftAllRightUint64x8, types.TypeVec512), sys.AMD64)
@@ -1354,22 +1374,24 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint64x2.ShiftAllRightAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllRightAndFillUpperFromMaskedUint64x2, types.TypeVec128, 0), sys.AMD64)
 	addF(simdPackage, "Uint64x4.ShiftAllRightAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllRightAndFillUpperFromMaskedUint64x4, types.TypeVec256, 0), sys.AMD64)
 	addF(simdPackage, "Uint64x8.ShiftAllRightAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllRightAndFillUpperFromMaskedUint64x8, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Int16x8.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt16x8, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int16x16.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int16x32.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt16x32, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int32x4.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt32x4, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int32x8.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int32x16.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt32x16, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int64x2.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int64x4.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int64x8.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt64x8, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint16x8.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint16x8, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint16x16.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint16x32.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint16x32, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint32x4.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint32x4, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint32x8.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint32x16.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint32x16, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Uint64x2.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint64x4.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint64x8.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.ShiftAllRightSignExtended", opLen2(ssa.OpShiftAllRightSignExtendedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.ShiftAllRightSignExtended", opLen2(ssa.OpShiftAllRightSignExtendedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x4.ShiftAllRightSignExtended", opLen2(ssa.OpShiftAllRightSignExtendedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.ShiftAllRightSignExtended", opLen2(ssa.OpShiftAllRightSignExtendedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x2.ShiftAllRightSignExtended", opLen2(ssa.OpShiftAllRightSignExtendedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.ShiftAllRightSignExtended", opLen2(ssa.OpShiftAllRightSignExtendedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.ShiftAllRightSignExtended", opLen2(ssa.OpShiftAllRightSignExtendedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.ShiftAllRightSignExtendedMasked", opLen3(ssa.OpShiftAllRightSignExtendedMaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.ShiftAllRightSignExtendedMasked", opLen3(ssa.OpShiftAllRightSignExtendedMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.ShiftAllRightSignExtendedMasked", opLen3(ssa.OpShiftAllRightSignExtendedMaskedInt64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int16x8.ShiftLeft", opLen2(ssa.OpShiftLeftInt16x8, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int16x16.ShiftLeft", opLen2(ssa.OpShiftLeftInt16x16, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int16x32.ShiftLeft", opLen2(ssa.OpShiftLeftInt16x32, types.TypeVec512), sys.AMD64)
@@ -1514,42 +1536,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint64x2.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint64x4.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint64x8.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedUint16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x32.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.ShiftRightSignExtended", opLen2(ssa.OpShiftRightSignExtendedUint64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedUint16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x32.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.ShiftRightSignExtendedMasked", opLen3(ssa.OpShiftRightSignExtendedMaskedUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int8x16.Sign", opLen2(ssa.OpSignInt8x16, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int8x32.Sign", opLen2(ssa.OpSignInt8x32, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int16x8.Sign", opLen2(ssa.OpSignInt16x8, types.TypeVec128), sys.AMD64)
diff --git a/src/simd/ops_amd64.go b/src/simd/ops_amd64.go
index e98aca1abf..38ccfaac8c 100644
--- a/src/simd/ops_amd64.go
+++ b/src/simd/ops_amd64.go
@@ -6883,6 +6883,11 @@ func (x Int16x8) ShiftAllLeft(y uint64) Int16x8
 // Asm: VPSLLW, CPU Feature: AVX2
 func (x Int16x16) ShiftAllLeft(y uint64) Int16x16
 
+// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLW, CPU Feature: AVX512EVEX
+func (x Int16x32) ShiftAllLeft(y uint64) Int16x32
+
 // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 //
 // Asm: VPSLLD, CPU Feature: AVX
@@ -6893,6 +6898,11 @@ func (x Int32x4) ShiftAllLeft(y uint64) Int32x4
 // Asm: VPSLLD, CPU Feature: AVX2
 func (x Int32x8) ShiftAllLeft(y uint64) Int32x8
 
+// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLD, CPU Feature: AVX512EVEX
+func (x Int32x16) ShiftAllLeft(y uint64) Int32x16
+
 // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 //
 // Asm: VPSLLQ, CPU Feature: AVX
@@ -6918,6 +6928,11 @@ func (x Uint16x8) ShiftAllLeft(y uint64) Uint16x8
 // Asm: VPSLLW, CPU Feature: AVX2
 func (x Uint16x16) ShiftAllLeft(y uint64) Uint16x16
 
+// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLW, CPU Feature: AVX512EVEX
+func (x Uint16x32) ShiftAllLeft(y uint64) Uint16x32
+
 // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 //
 // Asm: VPSLLD, CPU Feature: AVX
@@ -6928,6 +6943,11 @@ func (x Uint32x4) ShiftAllLeft(y uint64) Uint32x4
 // Asm: VPSLLD, CPU Feature: AVX2
 func (x Uint32x8) ShiftAllLeft(y uint64) Uint32x8
 
+// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLD, CPU Feature: AVX512EVEX
+func (x Uint32x16) ShiftAllLeft(y uint64) Uint32x16
+
 // ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 //
 // Asm: VPSLLQ, CPU Feature: AVX
@@ -7237,6 +7257,36 @@ func (x Uint64x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint64x8, z
 
 /* ShiftAllLeftMasked */
 
+// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLW, CPU Feature: AVX512EVEX
+func (x Int16x8) ShiftAllLeftMasked(y uint64, z Mask16x8) Int16x8
+
+// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLW, CPU Feature: AVX512EVEX
+func (x Int16x16) ShiftAllLeftMasked(y uint64, z Mask16x16) Int16x16
+
+// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLW, CPU Feature: AVX512EVEX
+func (x Int16x32) ShiftAllLeftMasked(y uint64, z Mask16x32) Int16x32
+
+// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLD, CPU Feature: AVX512EVEX
+func (x Int32x4) ShiftAllLeftMasked(y uint64, z Mask32x4) Int32x4
+
+// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLD, CPU Feature: AVX512EVEX
+func (x Int32x8) ShiftAllLeftMasked(y uint64, z Mask32x8) Int32x8
+
+// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLD, CPU Feature: AVX512EVEX
+func (x Int32x16) ShiftAllLeftMasked(y uint64, z Mask32x16) Int32x16
+
 // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 //
 // Asm: VPSLLQ, CPU Feature: AVX512EVEX
@@ -7252,6 +7302,36 @@ func (x Int64x4) ShiftAllLeftMasked(y uint64, z Mask64x4) Int64x4
 // Asm: VPSLLQ, CPU Feature: AVX512EVEX
 func (x Int64x8) ShiftAllLeftMasked(y uint64, z Mask64x8) Int64x8
 
+// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLW, CPU Feature: AVX512EVEX
+func (x Uint16x8) ShiftAllLeftMasked(y uint64, z Mask16x8) Uint16x8
+
+// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLW, CPU Feature: AVX512EVEX
+func (x Uint16x16) ShiftAllLeftMasked(y uint64, z Mask16x16) Uint16x16
+
+// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLW, CPU Feature: AVX512EVEX
+func (x Uint16x32) ShiftAllLeftMasked(y uint64, z Mask16x32) Uint16x32
+
+// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLD, CPU Feature: AVX512EVEX
+func (x Uint32x4) ShiftAllLeftMasked(y uint64, z Mask32x4) Uint32x4
+
+// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLD, CPU Feature: AVX512EVEX
+func (x Uint32x8) ShiftAllLeftMasked(y uint64, z Mask32x8) Uint32x8
+
+// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
+//
+// Asm: VPSLLD, CPU Feature: AVX512EVEX
+func (x Uint32x16) ShiftAllLeftMasked(y uint64, z Mask32x16) Uint32x16
+
 // ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
 //
 // Asm: VPSLLQ, CPU Feature: AVX512EVEX
@@ -7269,39 +7349,49 @@ func (x Uint64x8) ShiftAllLeftMasked(y uint64, z Mask64x8) Uint64x8
 
 /* ShiftAllRight */
 
-// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLW, CPU Feature: AVX
+// Asm: VPSRAW, CPU Feature: AVX
 func (x Int16x8) ShiftAllRight(y uint64) Int16x8
 
-// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLW, CPU Feature: AVX2
+// Asm: VPSRAW, CPU Feature: AVX2
 func (x Int16x16) ShiftAllRight(y uint64) Int16x16
 
-// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLD, CPU Feature: AVX
+// Asm: VPSRAW, CPU Feature: AVX512EVEX
+func (x Int16x32) ShiftAllRight(y uint64) Int16x32
+
+// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAD, CPU Feature: AVX
 func (x Int32x4) ShiftAllRight(y uint64) Int32x4
 
-// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLD, CPU Feature: AVX2
+// Asm: VPSRAD, CPU Feature: AVX2
 func (x Int32x8) ShiftAllRight(y uint64) Int32x8
 
-// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLQ, CPU Feature: AVX
+// Asm: VPSRAD, CPU Feature: AVX512EVEX
+func (x Int32x16) ShiftAllRight(y uint64) Int32x16
+
+// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAQ, CPU Feature: AVX512EVEX
 func (x Int64x2) ShiftAllRight(y uint64) Int64x2
 
-// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLQ, CPU Feature: AVX2
+// Asm: VPSRAQ, CPU Feature: AVX512EVEX
 func (x Int64x4) ShiftAllRight(y uint64) Int64x4
 
-// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLQ, CPU Feature: AVX512EVEX
+// Asm: VPSRAQ, CPU Feature: AVX512EVEX
 func (x Int64x8) ShiftAllRight(y uint64) Int64x8
 
 // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
@@ -7314,6 +7404,11 @@ func (x Uint16x8) ShiftAllRight(y uint64) Uint16x8
 // Asm: VPSRLW, CPU Feature: AVX2
 func (x Uint16x16) ShiftAllRight(y uint64) Uint16x16
 
+// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLW, CPU Feature: AVX512EVEX
+func (x Uint16x32) ShiftAllRight(y uint64) Uint16x32
+
 // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
 //
 // Asm: VPSRLD, CPU Feature: AVX
@@ -7324,6 +7419,11 @@ func (x Uint32x4) ShiftAllRight(y uint64) Uint32x4
 // Asm: VPSRLD, CPU Feature: AVX2
 func (x Uint32x8) ShiftAllRight(y uint64) Uint32x8
 
+// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLD, CPU Feature: AVX512EVEX
+func (x Uint32x16) ShiftAllRight(y uint64) Uint32x16
+
 // ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
 //
 // Asm: VPSRLQ, CPU Feature: AVX
@@ -7633,89 +7733,95 @@ func (x Uint64x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint64x8, z
 
 /* ShiftAllRightMasked */
 
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLQ, CPU Feature: AVX512EVEX
-func (x Int64x2) ShiftAllRightMasked(y uint64, z Mask64x2) Int64x2
+// Asm: VPSRAW, CPU Feature: AVX512EVEX
+func (x Int16x8) ShiftAllRightMasked(y uint64, z Mask16x8) Int16x8
 
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLQ, CPU Feature: AVX512EVEX
-func (x Int64x4) ShiftAllRightMasked(y uint64, z Mask64x4) Int64x4
+// Asm: VPSRAW, CPU Feature: AVX512EVEX
+func (x Int16x16) ShiftAllRightMasked(y uint64, z Mask16x16) Int16x16
 
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLQ, CPU Feature: AVX512EVEX
-func (x Int64x8) ShiftAllRightMasked(y uint64, z Mask64x8) Int64x8
+// Asm: VPSRAW, CPU Feature: AVX512EVEX
+func (x Int16x32) ShiftAllRightMasked(y uint64, z Mask16x32) Int16x32
 
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLQ, CPU Feature: AVX512EVEX
-func (x Uint64x2) ShiftAllRightMasked(y uint64, z Mask64x2) Uint64x2
+// Asm: VPSRAD, CPU Feature: AVX512EVEX
+func (x Int32x4) ShiftAllRightMasked(y uint64, z Mask32x4) Int32x4
 
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLQ, CPU Feature: AVX512EVEX
-func (x Uint64x4) ShiftAllRightMasked(y uint64, z Mask64x4) Uint64x4
+// Asm: VPSRAD, CPU Feature: AVX512EVEX
+func (x Int32x8) ShiftAllRightMasked(y uint64, z Mask32x8) Int32x8
 
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLQ, CPU Feature: AVX512EVEX
-func (x Uint64x8) ShiftAllRightMasked(y uint64, z Mask64x8) Uint64x8
+// Asm: VPSRAD, CPU Feature: AVX512EVEX
+func (x Int32x16) ShiftAllRightMasked(y uint64, z Mask32x16) Int32x16
 
-/* ShiftAllRightSignExtended */
+// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+//
+// Asm: VPSRAQ, CPU Feature: AVX512EVEX
+func (x Int64x2) ShiftAllRightMasked(y uint64, z Mask64x2) Int64x2
 
-// ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRAW, CPU Feature: AVX
-func (x Int16x8) ShiftAllRightSignExtended(y uint64) Int16x8
+// Asm: VPSRAQ, CPU Feature: AVX512EVEX
+func (x Int64x4) ShiftAllRightMasked(y uint64, z Mask64x4) Int64x4
 
-// ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRAW, CPU Feature: AVX2
-func (x Int16x16) ShiftAllRightSignExtended(y uint64) Int16x16
+// Asm: VPSRAQ, CPU Feature: AVX512EVEX
+func (x Int64x8) ShiftAllRightMasked(y uint64, z Mask64x8) Int64x8
 
-// ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
 //
-// Asm: VPSRAD, CPU Feature: AVX
-func (x Int32x4) ShiftAllRightSignExtended(y uint64) Int32x4
+// Asm: VPSRLW, CPU Feature: AVX512EVEX
+func (x Uint16x8) ShiftAllRightMasked(y uint64, z Mask16x8) Uint16x8
 
-// ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
 //
-// Asm: VPSRAD, CPU Feature: AVX2
-func (x Int32x8) ShiftAllRightSignExtended(y uint64) Int32x8
+// Asm: VPSRLW, CPU Feature: AVX512EVEX
+func (x Uint16x16) ShiftAllRightMasked(y uint64, z Mask16x16) Uint16x16
 
-// ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
 //
-// Asm: VPSRAQ, CPU Feature: AVX512EVEX
-func (x Int64x2) ShiftAllRightSignExtended(y uint64) Int64x2
+// Asm: VPSRLW, CPU Feature: AVX512EVEX
+func (x Uint16x32) ShiftAllRightMasked(y uint64, z Mask16x32) Uint16x32
 
-// ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
 //
-// Asm: VPSRAQ, CPU Feature: AVX512EVEX
-func (x Int64x4) ShiftAllRightSignExtended(y uint64) Int64x4
+// Asm: VPSRLD, CPU Feature: AVX512EVEX
+func (x Uint32x4) ShiftAllRightMasked(y uint64, z Mask32x4) Uint32x4
 
-// ShiftAllRightSignExtended shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
 //
-// Asm: VPSRAQ, CPU Feature: AVX512EVEX
-func (x Int64x8) ShiftAllRightSignExtended(y uint64) Int64x8
+// Asm: VPSRLD, CPU Feature: AVX512EVEX
+func (x Uint32x8) ShiftAllRightMasked(y uint64, z Mask32x8) Uint32x8
 
-/* ShiftAllRightSignExtendedMasked */
+// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+//
+// Asm: VPSRLD, CPU Feature: AVX512EVEX
+func (x Uint32x16) ShiftAllRightMasked(y uint64, z Mask32x16) Uint32x16
 
-// ShiftAllRightSignExtendedMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
 //
-// Asm: VPSRAQ, CPU Feature: AVX512EVEX
-func (x Int64x2) ShiftAllRightSignExtendedMasked(y uint64, z Mask64x2) Int64x2
+// Asm: VPSRLQ, CPU Feature: AVX512EVEX
+func (x Uint64x2) ShiftAllRightMasked(y uint64, z Mask64x2) Uint64x2
 
-// ShiftAllRightSignExtendedMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
 //
-// Asm: VPSRAQ, CPU Feature: AVX512EVEX
-func (x Int64x4) ShiftAllRightSignExtendedMasked(y uint64, z Mask64x4) Int64x4
+// Asm: VPSRLQ, CPU Feature: AVX512EVEX
+func (x Uint64x4) ShiftAllRightMasked(y uint64, z Mask64x4) Uint64x4
 
-// ShiftAllRightSignExtendedMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
+// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
 //
-// Asm: VPSRAQ, CPU Feature: AVX512EVEX
-func (x Int64x8) ShiftAllRightSignExtendedMasked(y uint64, z Mask64x8) Int64x8
+// Asm: VPSRLQ, CPU Feature: AVX512EVEX
+func (x Uint64x8) ShiftAllRightMasked(y uint64, z Mask64x8) Uint64x8
 
 /* ShiftLeft */
 
@@ -8123,49 +8229,49 @@ func (x Uint64x8) ShiftLeftMasked(y Uint64x8, z Mask64x8) Uint64x8
 
 /* ShiftRight */
 
-// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLVW, CPU Feature: AVX512EVEX
+// Asm: VPSRAVW, CPU Feature: AVX512EVEX
 func (x Int16x8) ShiftRight(y Int16x8) Int16x8
 
-// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLVW, CPU Feature: AVX512EVEX
+// Asm: VPSRAVW, CPU Feature: AVX512EVEX
 func (x Int16x16) ShiftRight(y Int16x16) Int16x16
 
-// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLVW, CPU Feature: AVX512EVEX
+// Asm: VPSRAVW, CPU Feature: AVX512EVEX
 func (x Int16x32) ShiftRight(y Int16x32) Int16x32
 
-// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLVD, CPU Feature: AVX2
+// Asm: VPSRAVD, CPU Feature: AVX2
 func (x Int32x4) ShiftRight(y Int32x4) Int32x4
 
-// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLVD, CPU Feature: AVX2
+// Asm: VPSRAVD, CPU Feature: AVX2
 func (x Int32x8) ShiftRight(y Int32x8) Int32x8
 
-// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLVD, CPU Feature: AVX512EVEX
+// Asm: VPSRAVD, CPU Feature: AVX512EVEX
 func (x Int32x16) ShiftRight(y Int32x16) Int32x16
 
-// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLVQ, CPU Feature: AVX2
+// Asm: VPSRAVQ, CPU Feature: AVX512EVEX
 func (x Int64x2) ShiftRight(y Int64x2) Int64x2
 
-// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLVQ, CPU Feature: AVX2
+// Asm: VPSRAVQ, CPU Feature: AVX512EVEX
 func (x Int64x4) ShiftRight(y Int64x4) Int64x4
 
-// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLVQ, CPU Feature: AVX512EVEX
+// Asm: VPSRAVQ, CPU Feature: AVX512EVEX
 func (x Int64x8) ShiftRight(y Int64x8) Int64x8
 
 // ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
@@ -8435,49 +8541,49 @@ func (x Uint64x8) ShiftRightAndFillUpperFromMasked(y Uint64x8, z Uint64x8, u Mas
 
 /* ShiftRightMasked */
 
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLVW, CPU Feature: AVX512EVEX
+// Asm: VPSRAVW, CPU Feature: AVX512EVEX
 func (x Int16x8) ShiftRightMasked(y Int16x8, z Mask16x8) Int16x8
 
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLVW, CPU Feature: AVX512EVEX
+// Asm: VPSRAVW, CPU Feature: AVX512EVEX
 func (x Int16x16) ShiftRightMasked(y Int16x16, z Mask16x16) Int16x16
 
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLVW, CPU Feature: AVX512EVEX
+// Asm: VPSRAVW, CPU Feature: AVX512EVEX
 func (x Int16x32) ShiftRightMasked(y Int16x32, z Mask16x32) Int16x32
 
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLVD, CPU Feature: AVX512EVEX
+// Asm: VPSRAVD, CPU Feature: AVX512EVEX
 func (x Int32x4) ShiftRightMasked(y Int32x4, z Mask32x4) Int32x4
 
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLVD, CPU Feature: AVX512EVEX
+// Asm: VPSRAVD, CPU Feature: AVX512EVEX
 func (x Int32x8) ShiftRightMasked(y Int32x8, z Mask32x8) Int32x8
 
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLVD, CPU Feature: AVX512EVEX
+// Asm: VPSRAVD, CPU Feature: AVX512EVEX
 func (x Int32x16) ShiftRightMasked(y Int32x16, z Mask32x16) Int32x16
 
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLVQ, CPU Feature: AVX512EVEX
+// Asm: VPSRAVQ, CPU Feature: AVX512EVEX
 func (x Int64x2) ShiftRightMasked(y Int64x2, z Mask64x2) Int64x2
 
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLVQ, CPU Feature: AVX512EVEX
+// Asm: VPSRAVQ, CPU Feature: AVX512EVEX
 func (x Int64x4) ShiftRightMasked(y Int64x4, z Mask64x4) Int64x4
 
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
 //
-// Asm: VPSRLVQ, CPU Feature: AVX512EVEX
+// Asm: VPSRAVQ, CPU Feature: AVX512EVEX
 func (x Int64x8) ShiftRightMasked(y Int64x8, z Mask64x8) Int64x8
 
 // ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
@@ -8525,190 +8631,6 @@ func (x Uint64x4) ShiftRightMasked(y Uint64x4, z Mask64x4) Uint64x4
 // Asm: VPSRLVQ, CPU Feature: AVX512EVEX
 func (x Uint64x8) ShiftRightMasked(y Uint64x8, z Mask64x8) Uint64x8
 
-/* ShiftRightSignExtended */
-
-// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVW, CPU Feature: AVX512EVEX
-func (x Int16x8) ShiftRightSignExtended(y Int16x8) Int16x8
-
-// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVW, CPU Feature: AVX512EVEX
-func (x Int16x16) ShiftRightSignExtended(y Int16x16) Int16x16
-
-// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVW, CPU Feature: AVX512EVEX
-func (x Int16x32) ShiftRightSignExtended(y Int16x32) Int16x32
-
-// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVD, CPU Feature: AVX2
-func (x Int32x4) ShiftRightSignExtended(y Int32x4) Int32x4
-
-// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVD, CPU Feature: AVX2
-func (x Int32x8) ShiftRightSignExtended(y Int32x8) Int32x8
-
-// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVD, CPU Feature: AVX512EVEX
-func (x Int32x16) ShiftRightSignExtended(y Int32x16) Int32x16
-
-// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVQ, CPU Feature: AVX512EVEX
-func (x Int64x2) ShiftRightSignExtended(y Int64x2) Int64x2
-
-// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVQ, CPU Feature: AVX512EVEX
-func (x Int64x4) ShiftRightSignExtended(y Int64x4) Int64x4
-
-// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVQ, CPU Feature: AVX512EVEX
-func (x Int64x8) ShiftRightSignExtended(y Int64x8) Int64x8
-
-// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVW, CPU Feature: AVX512EVEX
-func (x Uint16x8) ShiftRightSignExtended(y Uint16x8) Uint16x8
-
-// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVW, CPU Feature: AVX512EVEX
-func (x Uint16x16) ShiftRightSignExtended(y Uint16x16) Uint16x16
-
-// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVW, CPU Feature: AVX512EVEX
-func (x Uint16x32) ShiftRightSignExtended(y Uint16x32) Uint16x32
-
-// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVD, CPU Feature: AVX2
-func (x Uint32x4) ShiftRightSignExtended(y Uint32x4) Uint32x4
-
-// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVD, CPU Feature: AVX2
-func (x Uint32x8) ShiftRightSignExtended(y Uint32x8) Uint32x8
-
-// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVD, CPU Feature: AVX512EVEX
-func (x Uint32x16) ShiftRightSignExtended(y Uint32x16) Uint32x16
-
-// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVQ, CPU Feature: AVX512EVEX
-func (x Uint64x2) ShiftRightSignExtended(y Uint64x2) Uint64x2
-
-// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVQ, CPU Feature: AVX512EVEX
-func (x Uint64x4) ShiftRightSignExtended(y Uint64x4) Uint64x4
-
-// ShiftRightSignExtended shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVQ, CPU Feature: AVX512EVEX
-func (x Uint64x8) ShiftRightSignExtended(y Uint64x8) Uint64x8
-
-/* ShiftRightSignExtendedMasked */
-
-// ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVW, CPU Feature: AVX512EVEX
-func (x Int16x8) ShiftRightSignExtendedMasked(y Int16x8, z Mask16x8) Int16x8
-
-// ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVW, CPU Feature: AVX512EVEX
-func (x Int16x16) ShiftRightSignExtendedMasked(y Int16x16, z Mask16x16) Int16x16
-
-// ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVW, CPU Feature: AVX512EVEX
-func (x Int16x32) ShiftRightSignExtendedMasked(y Int16x32, z Mask16x32) Int16x32
-
-// ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVD, CPU Feature: AVX512EVEX
-func (x Int32x4) ShiftRightSignExtendedMasked(y Int32x4, z Mask32x4) Int32x4
-
-// ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVD, CPU Feature: AVX512EVEX
-func (x Int32x8) ShiftRightSignExtendedMasked(y Int32x8, z Mask32x8) Int32x8
-
-// ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVD, CPU Feature: AVX512EVEX
-func (x Int32x16) ShiftRightSignExtendedMasked(y Int32x16, z Mask32x16) Int32x16
-
-// ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVQ, CPU Feature: AVX512EVEX
-func (x Int64x2) ShiftRightSignExtendedMasked(y Int64x2, z Mask64x2) Int64x2
-
-// ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVQ, CPU Feature: AVX512EVEX
-func (x Int64x4) ShiftRightSignExtendedMasked(y Int64x4, z Mask64x4) Int64x4
-
-// ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVQ, CPU Feature: AVX512EVEX
-func (x Int64x8) ShiftRightSignExtendedMasked(y Int64x8, z Mask64x8) Int64x8
-
-// ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVW, CPU Feature: AVX512EVEX
-func (x Uint16x8) ShiftRightSignExtendedMasked(y Uint16x8, z Mask16x8) Uint16x8
-
-// ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVW, CPU Feature: AVX512EVEX
-func (x Uint16x16) ShiftRightSignExtendedMasked(y Uint16x16, z Mask16x16) Uint16x16
-
-// ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVW, CPU Feature: AVX512EVEX
-func (x Uint16x32) ShiftRightSignExtendedMasked(y Uint16x32, z Mask16x32) Uint16x32
-
-// ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVD, CPU Feature: AVX512EVEX
-func (x Uint32x4) ShiftRightSignExtendedMasked(y Uint32x4, z Mask32x4) Uint32x4
-
-// ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVD, CPU Feature: AVX512EVEX
-func (x Uint32x8) ShiftRightSignExtendedMasked(y Uint32x8, z Mask32x8) Uint32x8
-
-// ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVD, CPU Feature: AVX512EVEX
-func (x Uint32x16) ShiftRightSignExtendedMasked(y Uint32x16, z Mask32x16) Uint32x16
-
-// ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVQ, CPU Feature: AVX512EVEX
-func (x Uint64x2) ShiftRightSignExtendedMasked(y Uint64x2, z Mask64x2) Uint64x2
-
-// ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVQ, CPU Feature: AVX512EVEX
-func (x Uint64x4) ShiftRightSignExtendedMasked(y Uint64x4, z Mask64x4) Uint64x4
-
-// ShiftRightSignExtendedMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// Asm: VPSRAVQ, CPU Feature: AVX512EVEX
-func (x Uint64x8) ShiftRightSignExtendedMasked(y Uint64x8, z Mask64x8) Uint64x8
-
 /* Sign */
 
 // Sign returns the product of the first operand with -1, 0, or 1,
diff --git a/src/simd/simd_wrapped_test.go b/src/simd/simd_wrapped_test.go
index 62096a76cf..15e5c45097 100644
--- a/src/simd/simd_wrapped_test.go
+++ b/src/simd/simd_wrapped_test.go
@@ -2055,8 +2055,6 @@ func testInt16x8Binary(t *testing.T, v0 []int16, v1 []int16, want []int16, which
 		gotv = vec0.ShiftLeft(vec1)
 	case "ShiftRight":
 		gotv = vec0.ShiftRight(vec1)
-	case "ShiftRightSignExtended":
-		gotv = vec0.ShiftRightSignExtended(vec1)
 	case "Sign":
 		gotv = vec0.Sign(vec1)
 	case "Sub":
@@ -2101,8 +2099,6 @@ func testInt16x8BinaryMasked(t *testing.T, v0 []int16, v1 []int16, v2 []int16, w
 		gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask16x8())
 	case "ShiftRightMasked":
 		gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask16x8())
-	case "ShiftRightSignExtendedMasked":
-		gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask16x8())
 	case "SubMasked":
 		gotv = vec0.SubMasked(vec1, vec2.AsMask16x8())
 
@@ -2356,8 +2352,6 @@ func testInt16x16Binary(t *testing.T, v0 []int16, v1 []int16, want []int16, whic
 		gotv = vec0.ShiftLeft(vec1)
 	case "ShiftRight":
 		gotv = vec0.ShiftRight(vec1)
-	case "ShiftRightSignExtended":
-		gotv = vec0.ShiftRightSignExtended(vec1)
 	case "Sign":
 		gotv = vec0.Sign(vec1)
 	case "Sub":
@@ -2402,8 +2396,6 @@ func testInt16x16BinaryMasked(t *testing.T, v0 []int16, v1 []int16, v2 []int16,
 		gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask16x16())
 	case "ShiftRightMasked":
 		gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask16x16())
-	case "ShiftRightSignExtendedMasked":
-		gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask16x16())
 	case "SubMasked":
 		gotv = vec0.SubMasked(vec1, vec2.AsMask16x16())
 
@@ -2643,8 +2635,6 @@ func testInt16x32Binary(t *testing.T, v0 []int16, v1 []int16, want []int16, whic
 		gotv = vec0.ShiftLeft(vec1)
 	case "ShiftRight":
 		gotv = vec0.ShiftRight(vec1)
-	case "ShiftRightSignExtended":
-		gotv = vec0.ShiftRightSignExtended(vec1)
 	case "Sub":
 		gotv = vec0.Sub(vec1)
 
@@ -2685,8 +2675,6 @@ func testInt16x32BinaryMasked(t *testing.T, v0 []int16, v1 []int16, v2 []int16,
 		gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask16x32())
 	case "ShiftRightMasked":
 		gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask16x32())
-	case "ShiftRightSignExtendedMasked":
-		gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask16x32())
 	case "SubMasked":
 		gotv = vec0.SubMasked(vec1, vec2.AsMask16x32())
 
@@ -2934,8 +2922,6 @@ func testInt32x4Binary(t *testing.T, v0 []int32, v1 []int32, want []int32, which
 		gotv = vec0.ShiftLeft(vec1)
 	case "ShiftRight":
 		gotv = vec0.ShiftRight(vec1)
-	case "ShiftRightSignExtended":
-		gotv = vec0.ShiftRightSignExtended(vec1)
 	case "Sign":
 		gotv = vec0.Sign(vec1)
 	case "Sub":
@@ -2984,8 +2970,6 @@ func testInt32x4BinaryMasked(t *testing.T, v0 []int32, v1 []int32, v2 []int32, w
 		gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask32x4())
 	case "ShiftRightMasked":
 		gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask32x4())
-	case "ShiftRightSignExtendedMasked":
-		gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask32x4())
 	case "SubMasked":
 		gotv = vec0.SubMasked(vec1, vec2.AsMask32x4())
 	case "XorMasked":
@@ -3311,8 +3295,6 @@ func testInt32x8Binary(t *testing.T, v0 []int32, v1 []int32, want []int32, which
 		gotv = vec0.ShiftLeft(vec1)
 	case "ShiftRight":
 		gotv = vec0.ShiftRight(vec1)
-	case "ShiftRightSignExtended":
-		gotv = vec0.ShiftRightSignExtended(vec1)
 	case "Sign":
 		gotv = vec0.Sign(vec1)
 	case "Sub":
@@ -3361,8 +3343,6 @@ func testInt32x8BinaryMasked(t *testing.T, v0 []int32, v1 []int32, v2 []int32, w
 		gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask32x8())
 	case "ShiftRightMasked":
 		gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask32x8())
-	case "ShiftRightSignExtendedMasked":
-		gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask32x8())
 	case "SubMasked":
 		gotv = vec0.SubMasked(vec1, vec2.AsMask32x8())
 	case "XorMasked":
@@ -3684,8 +3664,6 @@ func testInt32x16Binary(t *testing.T, v0 []int32, v1 []int32, want []int32, whic
 		gotv = vec0.ShiftLeft(vec1)
 	case "ShiftRight":
 		gotv = vec0.ShiftRight(vec1)
-	case "ShiftRightSignExtended":
-		gotv = vec0.ShiftRightSignExtended(vec1)
 	case "Sub":
 		gotv = vec0.Sub(vec1)
 	case "Xor":
@@ -3732,8 +3710,6 @@ func testInt32x16BinaryMasked(t *testing.T, v0 []int32, v1 []int32, v2 []int32,
 		gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask32x16())
 	case "ShiftRightMasked":
 		gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask32x16())
-	case "ShiftRightSignExtendedMasked":
-		gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask32x16())
 	case "SubMasked":
 		gotv = vec0.SubMasked(vec1, vec2.AsMask32x16())
 	case "XorMasked":
@@ -4036,8 +4012,6 @@ func testInt64x2Binary(t *testing.T, v0 []int64, v1 []int64, want []int64, which
 		gotv = vec0.ShiftLeft(vec1)
 	case "ShiftRight":
 		gotv = vec0.ShiftRight(vec1)
-	case "ShiftRightSignExtended":
-		gotv = vec0.ShiftRightSignExtended(vec1)
 	case "Sub":
 		gotv = vec0.Sub(vec1)
 	case "Xor":
@@ -4086,8 +4060,6 @@ func testInt64x2BinaryMasked(t *testing.T, v0 []int64, v1 []int64, v2 []int64, w
 		gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask64x2())
 	case "ShiftRightMasked":
 		gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask64x2())
-	case "ShiftRightSignExtendedMasked":
-		gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask64x2())
 	case "SubMasked":
 		gotv = vec0.SubMasked(vec1, vec2.AsMask64x2())
 	case "XorMasked":
@@ -4292,8 +4264,6 @@ func testInt64x4Binary(t *testing.T, v0 []int64, v1 []int64, want []int64, which
 		gotv = vec0.ShiftLeft(vec1)
 	case "ShiftRight":
 		gotv = vec0.ShiftRight(vec1)
-	case "ShiftRightSignExtended":
-		gotv = vec0.ShiftRightSignExtended(vec1)
 	case "Sub":
 		gotv = vec0.Sub(vec1)
 	case "Xor":
@@ -4342,8 +4312,6 @@ func testInt64x4BinaryMasked(t *testing.T, v0 []int64, v1 []int64, v2 []int64, w
 		gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask64x4())
 	case "ShiftRightMasked":
 		gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask64x4())
-	case "ShiftRightSignExtendedMasked":
-		gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask64x4())
 	case "SubMasked":
 		gotv = vec0.SubMasked(vec1, vec2.AsMask64x4())
 	case "XorMasked":
@@ -4548,8 +4516,6 @@ func testInt64x8Binary(t *testing.T, v0 []int64, v1 []int64, want []int64, which
 		gotv = vec0.ShiftLeft(vec1)
 	case "ShiftRight":
 		gotv = vec0.ShiftRight(vec1)
-	case "ShiftRightSignExtended":
-		gotv = vec0.ShiftRightSignExtended(vec1)
 	case "Sub":
 		gotv = vec0.Sub(vec1)
 	case "Xor":
@@ -4598,8 +4564,6 @@ func testInt64x8BinaryMasked(t *testing.T, v0 []int64, v1 []int64, v2 []int64, w
 		gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask64x8())
 	case "ShiftRightMasked":
 		gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask64x8())
-	case "ShiftRightSignExtendedMasked":
-		gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask64x8())
 	case "SubMasked":
 		gotv = vec0.SubMasked(vec1, vec2.AsMask64x8())
 	case "XorMasked":
@@ -5478,8 +5442,6 @@ func testUint16x8Binary(t *testing.T, v0 []uint16, v1 []uint16, want []uint16, w
 		gotv = vec0.ShiftLeft(vec1)
 	case "ShiftRight":
 		gotv = vec0.ShiftRight(vec1)
-	case "ShiftRightSignExtended":
-		gotv = vec0.ShiftRightSignExtended(vec1)
 	case "Sub":
 		gotv = vec0.Sub(vec1)
 	case "Xor":
@@ -5522,8 +5484,6 @@ func testUint16x8BinaryMasked(t *testing.T, v0 []uint16, v1 []uint16, v2 []int16
 		gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask16x8())
 	case "ShiftRightMasked":
 		gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask16x8())
-	case "ShiftRightSignExtendedMasked":
-		gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask16x8())
 	case "SubMasked":
 		gotv = vec0.SubMasked(vec1, vec2.AsMask16x8())
 
@@ -5726,8 +5686,6 @@ func testUint16x16Binary(t *testing.T, v0 []uint16, v1 []uint16, want []uint16,
 		gotv = vec0.ShiftLeft(vec1)
 	case "ShiftRight":
 		gotv = vec0.ShiftRight(vec1)
-	case "ShiftRightSignExtended":
-		gotv = vec0.ShiftRightSignExtended(vec1)
 	case "Sub":
 		gotv = vec0.Sub(vec1)
 	case "Xor":
@@ -5770,8 +5728,6 @@ func testUint16x16BinaryMasked(t *testing.T, v0 []uint16, v1 []uint16, v2 []int1
 		gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask16x16())
 	case "ShiftRightMasked":
 		gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask16x16())
-	case "ShiftRightSignExtendedMasked":
-		gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask16x16())
 	case "SubMasked":
 		gotv = vec0.SubMasked(vec1, vec2.AsMask16x16())
 
@@ -5964,8 +5920,6 @@ func testUint16x32Binary(t *testing.T, v0 []uint16, v1 []uint16, want []uint16,
 		gotv = vec0.ShiftLeft(vec1)
 	case "ShiftRight":
 		gotv = vec0.ShiftRight(vec1)
-	case "ShiftRightSignExtended":
-		gotv = vec0.ShiftRightSignExtended(vec1)
 	case "Sub":
 		gotv = vec0.Sub(vec1)
 
@@ -6006,8 +5960,6 @@ func testUint16x32BinaryMasked(t *testing.T, v0 []uint16, v1 []uint16, v2 []int1
 		gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask16x32())
 	case "ShiftRightMasked":
 		gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask16x32())
-	case "ShiftRightSignExtendedMasked":
-		gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask16x32())
 	case "SubMasked":
 		gotv = vec0.SubMasked(vec1, vec2.AsMask16x32())
 
@@ -6206,8 +6158,6 @@ func testUint32x4Binary(t *testing.T, v0 []uint32, v1 []uint32, want []uint32, w
 		gotv = vec0.ShiftLeft(vec1)
 	case "ShiftRight":
 		gotv = vec0.ShiftRight(vec1)
-	case "ShiftRightSignExtended":
-		gotv = vec0.ShiftRightSignExtended(vec1)
 	case "Sub":
 		gotv = vec0.Sub(vec1)
 	case "Xor":
@@ -6252,8 +6202,6 @@ func testUint32x4BinaryMasked(t *testing.T, v0 []uint32, v1 []uint32, v2 []int32
 		gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask32x4())
 	case "ShiftRightMasked":
 		gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask32x4())
-	case "ShiftRightSignExtendedMasked":
-		gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask32x4())
 	case "SubMasked":
 		gotv = vec0.SubMasked(vec1, vec2.AsMask32x4())
 	case "XorMasked":
@@ -6524,8 +6472,6 @@ func testUint32x8Binary(t *testing.T, v0 []uint32, v1 []uint32, want []uint32, w
 		gotv = vec0.ShiftLeft(vec1)
 	case "ShiftRight":
 		gotv = vec0.ShiftRight(vec1)
-	case "ShiftRightSignExtended":
-		gotv = vec0.ShiftRightSignExtended(vec1)
 	case "Sub":
 		gotv = vec0.Sub(vec1)
 	case "Xor":
@@ -6570,8 +6516,6 @@ func testUint32x8BinaryMasked(t *testing.T, v0 []uint32, v1 []uint32, v2 []int32
 		gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask32x8())
 	case "ShiftRightMasked":
 		gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask32x8())
-	case "ShiftRightSignExtendedMasked":
-		gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask32x8())
 	case "SubMasked":
 		gotv = vec0.SubMasked(vec1, vec2.AsMask32x8())
 	case "XorMasked":
@@ -6838,8 +6782,6 @@ func testUint32x16Binary(t *testing.T, v0 []uint32, v1 []uint32, want []uint32,
 		gotv = vec0.ShiftLeft(vec1)
 	case "ShiftRight":
 		gotv = vec0.ShiftRight(vec1)
-	case "ShiftRightSignExtended":
-		gotv = vec0.ShiftRightSignExtended(vec1)
 	case "Sub":
 		gotv = vec0.Sub(vec1)
 	case "Xor":
@@ -6884,8 +6826,6 @@ func testUint32x16BinaryMasked(t *testing.T, v0 []uint32, v1 []uint32, v2 []int3
 		gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask32x16())
 	case "ShiftRightMasked":
 		gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask32x16())
-	case "ShiftRightSignExtendedMasked":
-		gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask32x16())
 	case "SubMasked":
 		gotv = vec0.SubMasked(vec1, vec2.AsMask32x16())
 	case "XorMasked":
@@ -7133,8 +7073,6 @@ func testUint64x2Binary(t *testing.T, v0 []uint64, v1 []uint64, want []uint64, w
 		gotv = vec0.ShiftLeft(vec1)
 	case "ShiftRight":
 		gotv = vec0.ShiftRight(vec1)
-	case "ShiftRightSignExtended":
-		gotv = vec0.ShiftRightSignExtended(vec1)
 	case "Sub":
 		gotv = vec0.Sub(vec1)
 	case "Xor":
@@ -7181,8 +7119,6 @@ func testUint64x2BinaryMasked(t *testing.T, v0 []uint64, v1 []uint64, v2 []int64
 		gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask64x2())
 	case "ShiftRightMasked":
 		gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask64x2())
-	case "ShiftRightSignExtendedMasked":
-		gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask64x2())
 	case "SubMasked":
 		gotv = vec0.SubMasked(vec1, vec2.AsMask64x2())
 	case "XorMasked":
@@ -7381,8 +7317,6 @@ func testUint64x4Binary(t *testing.T, v0 []uint64, v1 []uint64, want []uint64, w
 		gotv = vec0.ShiftLeft(vec1)
 	case "ShiftRight":
 		gotv = vec0.ShiftRight(vec1)
-	case "ShiftRightSignExtended":
-		gotv = vec0.ShiftRightSignExtended(vec1)
 	case "Sub":
 		gotv = vec0.Sub(vec1)
 	case "Xor":
@@ -7429,8 +7363,6 @@ func testUint64x4BinaryMasked(t *testing.T, v0 []uint64, v1 []uint64, v2 []int64
 		gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask64x4())
 	case "ShiftRightMasked":
 		gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask64x4())
-	case "ShiftRightSignExtendedMasked":
-		gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask64x4())
 	case "SubMasked":
 		gotv = vec0.SubMasked(vec1, vec2.AsMask64x4())
 	case "XorMasked":
@@ -7629,8 +7561,6 @@ func testUint64x8Binary(t *testing.T, v0 []uint64, v1 []uint64, want []uint64, w
 		gotv = vec0.ShiftLeft(vec1)
 	case "ShiftRight":
 		gotv = vec0.ShiftRight(vec1)
-	case "ShiftRightSignExtended":
-		gotv = vec0.ShiftRightSignExtended(vec1)
 	case "Sub":
 		gotv = vec0.Sub(vec1)
 	case "Xor":
@@ -7677,8 +7607,6 @@ func testUint64x8BinaryMasked(t *testing.T, v0 []uint64, v1 []uint64, v2 []int64
 		gotv = vec0.ShiftLeftMasked(vec1, vec2.AsMask64x8())
 	case "ShiftRightMasked":
 		gotv = vec0.ShiftRightMasked(vec1, vec2.AsMask64x8())
-	case "ShiftRightSignExtendedMasked":
-		gotv = vec0.ShiftRightSignExtendedMasked(vec1, vec2.AsMask64x8())
 	case "SubMasked":
 		gotv = vec0.SubMasked(vec1, vec2.AsMask64x8())
 	case "XorMasked":
@@ -7884,7 +7812,5 @@ func testUint64x8UnaryMasked(t *testing.T, v0 []uint64, v1 []int64, want []uint6
 // ShiftAllRightAndFillUpperFrom
 // ShiftAllRightAndFillUpperFromMasked
 // ShiftAllRightMasked
-// ShiftAllRightSignExtended
-// ShiftAllRightSignExtendedMasked
 // TruncWithPrecision
 // TruncWithPrecisionMasked
-- 
2.52.0