From: Junyang Shao <shaojunyang@google.com>
Date: Thu, 31 Jul 2025 23:45:09 +0000 (+0000)
Subject: [dev.simd] cmd/compile, simd: change Shift*AndFillUpperFrom to Shift*Concat
X-Git-Tag: go1.26rc1~147^2~153
X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=2c25f3e846;p=gostls13.git

[dev.simd] cmd/compile, simd: change Shift*AndFillUpperFrom to Shift*Concat

This CL is generated by CL 692216.

Change-Id: Ib7530142bcce2a23f90d48866271994c57561955
Reviewed-on: https://go-review.googlesource.com/c/go/+/692215
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---

diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
index e5e3fb0d50..38b602f35b 100644
--- a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
@@ -1401,42 +1401,42 @@
 (ShiftAllLeftUint64x2 ...) => (VPSLLQ128 ...)
 (ShiftAllLeftUint64x4 ...) => (VPSLLQ256 ...)
 (ShiftAllLeftUint64x8 ...) => (VPSLLQ512 ...)
-(ShiftAllLeftAndFillUpperFromInt16x8 ...) => (VPSHLDW128 ...)
-(ShiftAllLeftAndFillUpperFromInt16x16 ...) => (VPSHLDW256 ...)
-(ShiftAllLeftAndFillUpperFromInt16x32 ...) => (VPSHLDW512 ...)
-(ShiftAllLeftAndFillUpperFromInt32x4 ...) => (VPSHLDD128 ...)
-(ShiftAllLeftAndFillUpperFromInt32x8 ...) => (VPSHLDD256 ...)
-(ShiftAllLeftAndFillUpperFromInt32x16 ...) => (VPSHLDD512 ...)
-(ShiftAllLeftAndFillUpperFromInt64x2 ...) => (VPSHLDQ128 ...)
-(ShiftAllLeftAndFillUpperFromInt64x4 ...) => (VPSHLDQ256 ...)
-(ShiftAllLeftAndFillUpperFromInt64x8 ...) => (VPSHLDQ512 ...)
-(ShiftAllLeftAndFillUpperFromUint16x8 ...) => (VPSHLDW128 ...)
-(ShiftAllLeftAndFillUpperFromUint16x16 ...) => (VPSHLDW256 ...)
-(ShiftAllLeftAndFillUpperFromUint16x32 ...) => (VPSHLDW512 ...)
-(ShiftAllLeftAndFillUpperFromUint32x4 ...) => (VPSHLDD128 ...)
-(ShiftAllLeftAndFillUpperFromUint32x8 ...) => (VPSHLDD256 ...)
-(ShiftAllLeftAndFillUpperFromUint32x16 ...) => (VPSHLDD512 ...)
-(ShiftAllLeftAndFillUpperFromUint64x2 ...) => (VPSHLDQ128 ...)
-(ShiftAllLeftAndFillUpperFromUint64x4 ...) => (VPSHLDQ256 ...)
-(ShiftAllLeftAndFillUpperFromUint64x8 ...) => (VPSHLDQ512 ...)
-(ShiftAllLeftAndFillUpperFromMaskedInt16x8 [a] x y mask) => (VPSHLDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftAllLeftAndFillUpperFromMaskedInt16x16 [a] x y mask) => (VPSHLDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftAllLeftAndFillUpperFromMaskedInt16x32 [a] x y mask) => (VPSHLDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftAllLeftAndFillUpperFromMaskedInt32x4 [a] x y mask) => (VPSHLDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftAllLeftAndFillUpperFromMaskedInt32x8 [a] x y mask) => (VPSHLDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftAllLeftAndFillUpperFromMaskedInt32x16 [a] x y mask) => (VPSHLDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftAllLeftAndFillUpperFromMaskedInt64x2 [a] x y mask) => (VPSHLDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftAllLeftAndFillUpperFromMaskedInt64x4 [a] x y mask) => (VPSHLDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftAllLeftAndFillUpperFromMaskedInt64x8 [a] x y mask) => (VPSHLDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftAllLeftAndFillUpperFromMaskedUint16x8 [a] x y mask) => (VPSHLDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftAllLeftAndFillUpperFromMaskedUint16x16 [a] x y mask) => (VPSHLDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftAllLeftAndFillUpperFromMaskedUint16x32 [a] x y mask) => (VPSHLDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftAllLeftAndFillUpperFromMaskedUint32x4 [a] x y mask) => (VPSHLDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftAllLeftAndFillUpperFromMaskedUint32x8 [a] x y mask) => (VPSHLDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftAllLeftAndFillUpperFromMaskedUint32x16 [a] x y mask) => (VPSHLDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftAllLeftAndFillUpperFromMaskedUint64x2 [a] x y mask) => (VPSHLDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftAllLeftAndFillUpperFromMaskedUint64x4 [a] x y mask) => (VPSHLDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftAllLeftAndFillUpperFromMaskedUint64x8 [a] x y mask) => (VPSHLDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+(ShiftAllLeftConcatInt16x8 ...) => (VPSHLDW128 ...)
+(ShiftAllLeftConcatInt16x16 ...) => (VPSHLDW256 ...)
+(ShiftAllLeftConcatInt16x32 ...) => (VPSHLDW512 ...)
+(ShiftAllLeftConcatInt32x4 ...) => (VPSHLDD128 ...)
+(ShiftAllLeftConcatInt32x8 ...) => (VPSHLDD256 ...)
+(ShiftAllLeftConcatInt32x16 ...) => (VPSHLDD512 ...)
+(ShiftAllLeftConcatInt64x2 ...) => (VPSHLDQ128 ...)
+(ShiftAllLeftConcatInt64x4 ...) => (VPSHLDQ256 ...)
+(ShiftAllLeftConcatInt64x8 ...) => (VPSHLDQ512 ...)
+(ShiftAllLeftConcatUint16x8 ...) => (VPSHLDW128 ...)
+(ShiftAllLeftConcatUint16x16 ...) => (VPSHLDW256 ...)
+(ShiftAllLeftConcatUint16x32 ...) => (VPSHLDW512 ...)
+(ShiftAllLeftConcatUint32x4 ...) => (VPSHLDD128 ...)
+(ShiftAllLeftConcatUint32x8 ...) => (VPSHLDD256 ...)
+(ShiftAllLeftConcatUint32x16 ...) => (VPSHLDD512 ...)
+(ShiftAllLeftConcatUint64x2 ...) => (VPSHLDQ128 ...)
+(ShiftAllLeftConcatUint64x4 ...) => (VPSHLDQ256 ...)
+(ShiftAllLeftConcatUint64x8 ...) => (VPSHLDQ512 ...)
+(ShiftAllLeftConcatMaskedInt16x8 [a] x y mask) => (VPSHLDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(ShiftAllLeftConcatMaskedInt16x16 [a] x y mask) => (VPSHLDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(ShiftAllLeftConcatMaskedInt16x32 [a] x y mask) => (VPSHLDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+(ShiftAllLeftConcatMaskedInt32x4 [a] x y mask) => (VPSHLDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+(ShiftAllLeftConcatMaskedInt32x8 [a] x y mask) => (VPSHLDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(ShiftAllLeftConcatMaskedInt32x16 [a] x y mask) => (VPSHLDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+(ShiftAllLeftConcatMaskedInt64x2 [a] x y mask) => (VPSHLDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+(ShiftAllLeftConcatMaskedInt64x4 [a] x y mask) => (VPSHLDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+(ShiftAllLeftConcatMaskedInt64x8 [a] x y mask) => (VPSHLDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+(ShiftAllLeftConcatMaskedUint16x8 [a] x y mask) => (VPSHLDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(ShiftAllLeftConcatMaskedUint16x16 [a] x y mask) => (VPSHLDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(ShiftAllLeftConcatMaskedUint16x32 [a] x y mask) => (VPSHLDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+(ShiftAllLeftConcatMaskedUint32x4 [a] x y mask) => (VPSHLDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+(ShiftAllLeftConcatMaskedUint32x8 [a] x y mask) => (VPSHLDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(ShiftAllLeftConcatMaskedUint32x16 [a] x y mask) => (VPSHLDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+(ShiftAllLeftConcatMaskedUint64x2 [a] x y mask) => (VPSHLDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+(ShiftAllLeftConcatMaskedUint64x4 [a] x y mask) => (VPSHLDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+(ShiftAllLeftConcatMaskedUint64x8 [a] x y mask) => (VPSHLDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (ShiftAllLeftMaskedInt16x8 x y mask) => (VPSLLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
 (ShiftAllLeftMaskedInt16x16 x y mask) => (VPSLLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
 (ShiftAllLeftMaskedInt16x32 x y mask) => (VPSLLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
@@ -1473,42 +1473,42 @@
 (ShiftAllRightUint64x2 ...) => (VPSRLQ128 ...)
 (ShiftAllRightUint64x4 ...) => (VPSRLQ256 ...)
 (ShiftAllRightUint64x8 ...) => (VPSRLQ512 ...)
-(ShiftAllRightAndFillUpperFromInt16x8 ...) => (VPSHRDW128 ...)
-(ShiftAllRightAndFillUpperFromInt16x16 ...) => (VPSHRDW256 ...)
-(ShiftAllRightAndFillUpperFromInt16x32 ...) => (VPSHRDW512 ...)
-(ShiftAllRightAndFillUpperFromInt32x4 ...) => (VPSHRDD128 ...)
-(ShiftAllRightAndFillUpperFromInt32x8 ...) => (VPSHRDD256 ...)
-(ShiftAllRightAndFillUpperFromInt32x16 ...) => (VPSHRDD512 ...)
-(ShiftAllRightAndFillUpperFromInt64x2 ...) => (VPSHRDQ128 ...)
-(ShiftAllRightAndFillUpperFromInt64x4 ...) => (VPSHRDQ256 ...)
-(ShiftAllRightAndFillUpperFromInt64x8 ...) => (VPSHRDQ512 ...)
-(ShiftAllRightAndFillUpperFromUint16x8 ...) => (VPSHRDW128 ...)
-(ShiftAllRightAndFillUpperFromUint16x16 ...) => (VPSHRDW256 ...)
-(ShiftAllRightAndFillUpperFromUint16x32 ...) => (VPSHRDW512 ...)
-(ShiftAllRightAndFillUpperFromUint32x4 ...) => (VPSHRDD128 ...)
-(ShiftAllRightAndFillUpperFromUint32x8 ...) => (VPSHRDD256 ...)
-(ShiftAllRightAndFillUpperFromUint32x16 ...) => (VPSHRDD512 ...)
-(ShiftAllRightAndFillUpperFromUint64x2 ...) => (VPSHRDQ128 ...)
-(ShiftAllRightAndFillUpperFromUint64x4 ...) => (VPSHRDQ256 ...)
-(ShiftAllRightAndFillUpperFromUint64x8 ...) => (VPSHRDQ512 ...)
-(ShiftAllRightAndFillUpperFromMaskedInt16x8 [a] x y mask) => (VPSHRDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftAllRightAndFillUpperFromMaskedInt16x16 [a] x y mask) => (VPSHRDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftAllRightAndFillUpperFromMaskedInt16x32 [a] x y mask) => (VPSHRDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftAllRightAndFillUpperFromMaskedInt32x4 [a] x y mask) => (VPSHRDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftAllRightAndFillUpperFromMaskedInt32x8 [a] x y mask) => (VPSHRDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftAllRightAndFillUpperFromMaskedInt32x16 [a] x y mask) => (VPSHRDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftAllRightAndFillUpperFromMaskedInt64x2 [a] x y mask) => (VPSHRDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftAllRightAndFillUpperFromMaskedInt64x4 [a] x y mask) => (VPSHRDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftAllRightAndFillUpperFromMaskedInt64x8 [a] x y mask) => (VPSHRDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftAllRightAndFillUpperFromMaskedUint16x8 [a] x y mask) => (VPSHRDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftAllRightAndFillUpperFromMaskedUint16x16 [a] x y mask) => (VPSHRDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftAllRightAndFillUpperFromMaskedUint16x32 [a] x y mask) => (VPSHRDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftAllRightAndFillUpperFromMaskedUint32x4 [a] x y mask) => (VPSHRDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftAllRightAndFillUpperFromMaskedUint32x8 [a] x y mask) => (VPSHRDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftAllRightAndFillUpperFromMaskedUint32x16 [a] x y mask) => (VPSHRDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftAllRightAndFillUpperFromMaskedUint64x2 [a] x y mask) => (VPSHRDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftAllRightAndFillUpperFromMaskedUint64x4 [a] x y mask) => (VPSHRDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftAllRightAndFillUpperFromMaskedUint64x8 [a] x y mask) => (VPSHRDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+(ShiftAllRightConcatInt16x8 ...) => (VPSHRDW128 ...)
+(ShiftAllRightConcatInt16x16 ...) => (VPSHRDW256 ...)
+(ShiftAllRightConcatInt16x32 ...) => (VPSHRDW512 ...)
+(ShiftAllRightConcatInt32x4 ...) => (VPSHRDD128 ...)
+(ShiftAllRightConcatInt32x8 ...) => (VPSHRDD256 ...)
+(ShiftAllRightConcatInt32x16 ...) => (VPSHRDD512 ...)
+(ShiftAllRightConcatInt64x2 ...) => (VPSHRDQ128 ...)
+(ShiftAllRightConcatInt64x4 ...) => (VPSHRDQ256 ...)
+(ShiftAllRightConcatInt64x8 ...) => (VPSHRDQ512 ...)
+(ShiftAllRightConcatUint16x8 ...) => (VPSHRDW128 ...)
+(ShiftAllRightConcatUint16x16 ...) => (VPSHRDW256 ...)
+(ShiftAllRightConcatUint16x32 ...) => (VPSHRDW512 ...)
+(ShiftAllRightConcatUint32x4 ...) => (VPSHRDD128 ...)
+(ShiftAllRightConcatUint32x8 ...) => (VPSHRDD256 ...)
+(ShiftAllRightConcatUint32x16 ...) => (VPSHRDD512 ...)
+(ShiftAllRightConcatUint64x2 ...) => (VPSHRDQ128 ...)
+(ShiftAllRightConcatUint64x4 ...) => (VPSHRDQ256 ...)
+(ShiftAllRightConcatUint64x8 ...) => (VPSHRDQ512 ...)
+(ShiftAllRightConcatMaskedInt16x8 [a] x y mask) => (VPSHRDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(ShiftAllRightConcatMaskedInt16x16 [a] x y mask) => (VPSHRDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(ShiftAllRightConcatMaskedInt16x32 [a] x y mask) => (VPSHRDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+(ShiftAllRightConcatMaskedInt32x4 [a] x y mask) => (VPSHRDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+(ShiftAllRightConcatMaskedInt32x8 [a] x y mask) => (VPSHRDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(ShiftAllRightConcatMaskedInt32x16 [a] x y mask) => (VPSHRDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+(ShiftAllRightConcatMaskedInt64x2 [a] x y mask) => (VPSHRDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+(ShiftAllRightConcatMaskedInt64x4 [a] x y mask) => (VPSHRDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+(ShiftAllRightConcatMaskedInt64x8 [a] x y mask) => (VPSHRDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+(ShiftAllRightConcatMaskedUint16x8 [a] x y mask) => (VPSHRDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(ShiftAllRightConcatMaskedUint16x16 [a] x y mask) => (VPSHRDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(ShiftAllRightConcatMaskedUint16x32 [a] x y mask) => (VPSHRDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+(ShiftAllRightConcatMaskedUint32x4 [a] x y mask) => (VPSHRDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+(ShiftAllRightConcatMaskedUint32x8 [a] x y mask) => (VPSHRDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(ShiftAllRightConcatMaskedUint32x16 [a] x y mask) => (VPSHRDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+(ShiftAllRightConcatMaskedUint64x2 [a] x y mask) => (VPSHRDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+(ShiftAllRightConcatMaskedUint64x4 [a] x y mask) => (VPSHRDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+(ShiftAllRightConcatMaskedUint64x8 [a] x y mask) => (VPSHRDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (ShiftAllRightMaskedInt16x8 x y mask) => (VPSRAWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
 (ShiftAllRightMaskedInt16x16 x y mask) => (VPSRAWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
 (ShiftAllRightMaskedInt16x32 x y mask) => (VPSRAWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
@@ -1545,42 +1545,42 @@
 (ShiftLeftUint64x2 ...) => (VPSLLVQ128 ...)
 (ShiftLeftUint64x4 ...) => (VPSLLVQ256 ...)
 (ShiftLeftUint64x8 ...) => (VPSLLVQ512 ...)
-(ShiftLeftAndFillUpperFromInt16x8 ...) => (VPSHLDVW128 ...)
-(ShiftLeftAndFillUpperFromInt16x16 ...) => (VPSHLDVW256 ...)
-(ShiftLeftAndFillUpperFromInt16x32 ...) => (VPSHLDVW512 ...)
-(ShiftLeftAndFillUpperFromInt32x4 ...) => (VPSHLDVD128 ...)
-(ShiftLeftAndFillUpperFromInt32x8 ...) => (VPSHLDVD256 ...)
-(ShiftLeftAndFillUpperFromInt32x16 ...) => (VPSHLDVD512 ...)
-(ShiftLeftAndFillUpperFromInt64x2 ...) => (VPSHLDVQ128 ...)
-(ShiftLeftAndFillUpperFromInt64x4 ...) => (VPSHLDVQ256 ...)
-(ShiftLeftAndFillUpperFromInt64x8 ...) => (VPSHLDVQ512 ...)
-(ShiftLeftAndFillUpperFromUint16x8 ...) => (VPSHLDVW128 ...)
-(ShiftLeftAndFillUpperFromUint16x16 ...) => (VPSHLDVW256 ...)
-(ShiftLeftAndFillUpperFromUint16x32 ...) => (VPSHLDVW512 ...)
-(ShiftLeftAndFillUpperFromUint32x4 ...) => (VPSHLDVD128 ...)
-(ShiftLeftAndFillUpperFromUint32x8 ...) => (VPSHLDVD256 ...)
-(ShiftLeftAndFillUpperFromUint32x16 ...) => (VPSHLDVD512 ...)
-(ShiftLeftAndFillUpperFromUint64x2 ...) => (VPSHLDVQ128 ...)
-(ShiftLeftAndFillUpperFromUint64x4 ...) => (VPSHLDVQ256 ...)
-(ShiftLeftAndFillUpperFromUint64x8 ...) => (VPSHLDVQ512 ...)
-(ShiftLeftAndFillUpperFromMaskedInt16x8 x y z mask) => (VPSHLDVWMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftLeftAndFillUpperFromMaskedInt16x16 x y z mask) => (VPSHLDVWMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftLeftAndFillUpperFromMaskedInt16x32 x y z mask) => (VPSHLDVWMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftLeftAndFillUpperFromMaskedInt32x4 x y z mask) => (VPSHLDVDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftLeftAndFillUpperFromMaskedInt32x8 x y z mask) => (VPSHLDVDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftLeftAndFillUpperFromMaskedInt32x16 x y z mask) => (VPSHLDVDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftLeftAndFillUpperFromMaskedInt64x2 x y z mask) => (VPSHLDVQMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftLeftAndFillUpperFromMaskedInt64x4 x y z mask) => (VPSHLDVQMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftLeftAndFillUpperFromMaskedInt64x8 x y z mask) => (VPSHLDVQMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftLeftAndFillUpperFromMaskedUint16x8 x y z mask) => (VPSHLDVWMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftLeftAndFillUpperFromMaskedUint16x16 x y z mask) => (VPSHLDVWMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftLeftAndFillUpperFromMaskedUint16x32 x y z mask) => (VPSHLDVWMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftLeftAndFillUpperFromMaskedUint32x4 x y z mask) => (VPSHLDVDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftLeftAndFillUpperFromMaskedUint32x8 x y z mask) => (VPSHLDVDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftLeftAndFillUpperFromMaskedUint32x16 x y z mask) => (VPSHLDVDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftLeftAndFillUpperFromMaskedUint64x2 x y z mask) => (VPSHLDVQMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftLeftAndFillUpperFromMaskedUint64x4 x y z mask) => (VPSHLDVQMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftLeftAndFillUpperFromMaskedUint64x8 x y z mask) => (VPSHLDVQMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
+(ShiftLeftConcatInt16x8 ...) => (VPSHLDVW128 ...)
+(ShiftLeftConcatInt16x16 ...) => (VPSHLDVW256 ...)
+(ShiftLeftConcatInt16x32 ...) => (VPSHLDVW512 ...)
+(ShiftLeftConcatInt32x4 ...) => (VPSHLDVD128 ...)
+(ShiftLeftConcatInt32x8 ...) => (VPSHLDVD256 ...)
+(ShiftLeftConcatInt32x16 ...) => (VPSHLDVD512 ...)
+(ShiftLeftConcatInt64x2 ...) => (VPSHLDVQ128 ...)
+(ShiftLeftConcatInt64x4 ...) => (VPSHLDVQ256 ...)
+(ShiftLeftConcatInt64x8 ...) => (VPSHLDVQ512 ...)
+(ShiftLeftConcatUint16x8 ...) => (VPSHLDVW128 ...)
+(ShiftLeftConcatUint16x16 ...) => (VPSHLDVW256 ...)
+(ShiftLeftConcatUint16x32 ...) => (VPSHLDVW512 ...)
+(ShiftLeftConcatUint32x4 ...) => (VPSHLDVD128 ...)
+(ShiftLeftConcatUint32x8 ...) => (VPSHLDVD256 ...)
+(ShiftLeftConcatUint32x16 ...) => (VPSHLDVD512 ...)
+(ShiftLeftConcatUint64x2 ...) => (VPSHLDVQ128 ...)
+(ShiftLeftConcatUint64x4 ...) => (VPSHLDVQ256 ...)
+(ShiftLeftConcatUint64x8 ...) => (VPSHLDVQ512 ...)
+(ShiftLeftConcatMaskedInt16x8 x y z mask) => (VPSHLDVWMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
+(ShiftLeftConcatMaskedInt16x16 x y z mask) => (VPSHLDVWMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
+(ShiftLeftConcatMaskedInt16x32 x y z mask) => (VPSHLDVWMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
+(ShiftLeftConcatMaskedInt32x4 x y z mask) => (VPSHLDVDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+(ShiftLeftConcatMaskedInt32x8 x y z mask) => (VPSHLDVDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
+(ShiftLeftConcatMaskedInt32x16 x y z mask) => (VPSHLDVDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+(ShiftLeftConcatMaskedInt64x2 x y z mask) => (VPSHLDVQMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
+(ShiftLeftConcatMaskedInt64x4 x y z mask) => (VPSHLDVQMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
+(ShiftLeftConcatMaskedInt64x8 x y z mask) => (VPSHLDVQMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
+(ShiftLeftConcatMaskedUint16x8 x y z mask) => (VPSHLDVWMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
+(ShiftLeftConcatMaskedUint16x16 x y z mask) => (VPSHLDVWMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
+(ShiftLeftConcatMaskedUint16x32 x y z mask) => (VPSHLDVWMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
+(ShiftLeftConcatMaskedUint32x4 x y z mask) => (VPSHLDVDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+(ShiftLeftConcatMaskedUint32x8 x y z mask) => (VPSHLDVDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
+(ShiftLeftConcatMaskedUint32x16 x y z mask) => (VPSHLDVDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+(ShiftLeftConcatMaskedUint64x2 x y z mask) => (VPSHLDVQMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
+(ShiftLeftConcatMaskedUint64x4 x y z mask) => (VPSHLDVQMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
+(ShiftLeftConcatMaskedUint64x8 x y z mask) => (VPSHLDVQMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
 (ShiftLeftMaskedInt16x8 x y mask) => (VPSLLVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
 (ShiftLeftMaskedInt16x16 x y mask) => (VPSLLVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
 (ShiftLeftMaskedInt16x32 x y mask) => (VPSLLVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
@@ -1617,42 +1617,42 @@
 (ShiftRightUint64x2 ...) => (VPSRLVQ128 ...)
 (ShiftRightUint64x4 ...) => (VPSRLVQ256 ...)
 (ShiftRightUint64x8 ...) => (VPSRLVQ512 ...)
-(ShiftRightAndFillUpperFromInt16x8 ...) => (VPSHRDVW128 ...)
-(ShiftRightAndFillUpperFromInt16x16 ...) => (VPSHRDVW256 ...)
-(ShiftRightAndFillUpperFromInt16x32 ...) => (VPSHRDVW512 ...)
-(ShiftRightAndFillUpperFromInt32x4 ...) => (VPSHRDVD128 ...)
-(ShiftRightAndFillUpperFromInt32x8 ...) => (VPSHRDVD256 ...)
-(ShiftRightAndFillUpperFromInt32x16 ...) => (VPSHRDVD512 ...)
-(ShiftRightAndFillUpperFromInt64x2 ...) => (VPSHRDVQ128 ...)
-(ShiftRightAndFillUpperFromInt64x4 ...) => (VPSHRDVQ256 ...)
-(ShiftRightAndFillUpperFromInt64x8 ...) => (VPSHRDVQ512 ...)
-(ShiftRightAndFillUpperFromUint16x8 ...) => (VPSHRDVW128 ...)
-(ShiftRightAndFillUpperFromUint16x16 ...) => (VPSHRDVW256 ...)
-(ShiftRightAndFillUpperFromUint16x32 ...) => (VPSHRDVW512 ...)
-(ShiftRightAndFillUpperFromUint32x4 ...) => (VPSHRDVD128 ...)
-(ShiftRightAndFillUpperFromUint32x8 ...) => (VPSHRDVD256 ...)
-(ShiftRightAndFillUpperFromUint32x16 ...) => (VPSHRDVD512 ...)
-(ShiftRightAndFillUpperFromUint64x2 ...) => (VPSHRDVQ128 ...)
-(ShiftRightAndFillUpperFromUint64x4 ...) => (VPSHRDVQ256 ...)
-(ShiftRightAndFillUpperFromUint64x8 ...) => (VPSHRDVQ512 ...)
-(ShiftRightAndFillUpperFromMaskedInt16x8 x y z mask) => (VPSHRDVWMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftRightAndFillUpperFromMaskedInt16x16 x y z mask) => (VPSHRDVWMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftRightAndFillUpperFromMaskedInt16x32 x y z mask) => (VPSHRDVWMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftRightAndFillUpperFromMaskedInt32x4 x y z mask) => (VPSHRDVDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftRightAndFillUpperFromMaskedInt32x8 x y z mask) => (VPSHRDVDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftRightAndFillUpperFromMaskedInt32x16 x y z mask) => (VPSHRDVDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftRightAndFillUpperFromMaskedInt64x2 x y z mask) => (VPSHRDVQMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftRightAndFillUpperFromMaskedInt64x4 x y z mask) => (VPSHRDVQMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftRightAndFillUpperFromMaskedInt64x8 x y z mask) => (VPSHRDVQMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftRightAndFillUpperFromMaskedUint16x8 x y z mask) => (VPSHRDVWMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftRightAndFillUpperFromMaskedUint16x16 x y z mask) => (VPSHRDVWMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftRightAndFillUpperFromMaskedUint16x32 x y z mask) => (VPSHRDVWMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftRightAndFillUpperFromMaskedUint32x4 x y z mask) => (VPSHRDVDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftRightAndFillUpperFromMaskedUint32x8 x y z mask) => (VPSHRDVDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftRightAndFillUpperFromMaskedUint32x16 x y z mask) => (VPSHRDVDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftRightAndFillUpperFromMaskedUint64x2 x y z mask) => (VPSHRDVQMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftRightAndFillUpperFromMaskedUint64x4 x y z mask) => (VPSHRDVQMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftRightAndFillUpperFromMaskedUint64x8 x y z mask) => (VPSHRDVQMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
+(ShiftRightConcatInt16x8 ...) => (VPSHRDVW128 ...)
+(ShiftRightConcatInt16x16 ...) => (VPSHRDVW256 ...)
+(ShiftRightConcatInt16x32 ...) => (VPSHRDVW512 ...)
+(ShiftRightConcatInt32x4 ...) => (VPSHRDVD128 ...)
+(ShiftRightConcatInt32x8 ...) => (VPSHRDVD256 ...)
+(ShiftRightConcatInt32x16 ...) => (VPSHRDVD512 ...)
+(ShiftRightConcatInt64x2 ...) => (VPSHRDVQ128 ...)
+(ShiftRightConcatInt64x4 ...) => (VPSHRDVQ256 ...)
+(ShiftRightConcatInt64x8 ...) => (VPSHRDVQ512 ...)
+(ShiftRightConcatUint16x8 ...) => (VPSHRDVW128 ...)
+(ShiftRightConcatUint16x16 ...) => (VPSHRDVW256 ...)
+(ShiftRightConcatUint16x32 ...) => (VPSHRDVW512 ...)
+(ShiftRightConcatUint32x4 ...) => (VPSHRDVD128 ...)
+(ShiftRightConcatUint32x8 ...) => (VPSHRDVD256 ...)
+(ShiftRightConcatUint32x16 ...) => (VPSHRDVD512 ...)
+(ShiftRightConcatUint64x2 ...) => (VPSHRDVQ128 ...)
+(ShiftRightConcatUint64x4 ...) => (VPSHRDVQ256 ...)
+(ShiftRightConcatUint64x8 ...) => (VPSHRDVQ512 ...)
+(ShiftRightConcatMaskedInt16x8 x y z mask) => (VPSHRDVWMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
+(ShiftRightConcatMaskedInt16x16 x y z mask) => (VPSHRDVWMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
+(ShiftRightConcatMaskedInt16x32 x y z mask) => (VPSHRDVWMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
+(ShiftRightConcatMaskedInt32x4 x y z mask) => (VPSHRDVDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+(ShiftRightConcatMaskedInt32x8 x y z mask) => (VPSHRDVDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
+(ShiftRightConcatMaskedInt32x16 x y z mask) => (VPSHRDVDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+(ShiftRightConcatMaskedInt64x2 x y z mask) => (VPSHRDVQMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
+(ShiftRightConcatMaskedInt64x4 x y z mask) => (VPSHRDVQMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
+(ShiftRightConcatMaskedInt64x8 x y z mask) => (VPSHRDVQMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
+(ShiftRightConcatMaskedUint16x8 x y z mask) => (VPSHRDVWMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
+(ShiftRightConcatMaskedUint16x16 x y z mask) => (VPSHRDVWMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
+(ShiftRightConcatMaskedUint16x32 x y z mask) => (VPSHRDVWMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
+(ShiftRightConcatMaskedUint32x4 x y z mask) => (VPSHRDVDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+(ShiftRightConcatMaskedUint32x8 x y z mask) => (VPSHRDVDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
+(ShiftRightConcatMaskedUint32x16 x y z mask) => (VPSHRDVDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+(ShiftRightConcatMaskedUint64x2 x y z mask) => (VPSHRDVQMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
+(ShiftRightConcatMaskedUint64x4 x y z mask) => (VPSHRDVQMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
+(ShiftRightConcatMaskedUint64x8 x y z mask) => (VPSHRDVQMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
 (ShiftRightMaskedInt16x8 x y mask) => (VPSRAVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
 (ShiftRightMaskedInt16x16 x y mask) => (VPSRAVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
 (ShiftRightMaskedInt16x32 x y mask) => (VPSRAVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
diff --git a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
index f1c1246d24..d681620bc3 100644
--- a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
+++ b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
@@ -1278,42 +1278,42 @@ func simdGenericOps() []opData {
 		{name: "ShiftAllRightUint64x2", argLength: 2, commutative: false},
 		{name: "ShiftAllRightUint64x4", argLength: 2, commutative: false},
 		{name: "ShiftAllRightUint64x8", argLength: 2, commutative: false},
-		{name: "ShiftLeftAndFillUpperFromInt16x8", argLength: 3, commutative: false},
-		{name: "ShiftLeftAndFillUpperFromInt16x16", argLength: 3, commutative: false},
-		{name: "ShiftLeftAndFillUpperFromInt16x32", argLength: 3, commutative: false},
-		{name: "ShiftLeftAndFillUpperFromInt32x4", argLength: 3, commutative: false},
-		{name: "ShiftLeftAndFillUpperFromInt32x8", argLength: 3, commutative: false},
-		{name: "ShiftLeftAndFillUpperFromInt32x16", argLength: 3, commutative: false},
-		{name: "ShiftLeftAndFillUpperFromInt64x2", argLength: 3, commutative: false},
-		{name: "ShiftLeftAndFillUpperFromInt64x4", argLength: 3, commutative: false},
-		{name: "ShiftLeftAndFillUpperFromInt64x8", argLength: 3, commutative: false},
-		{name: "ShiftLeftAndFillUpperFromMaskedInt16x8", argLength: 4, commutative: false},
-		{name: "ShiftLeftAndFillUpperFromMaskedInt16x16", argLength: 4, commutative: false},
-		{name: "ShiftLeftAndFillUpperFromMaskedInt16x32", argLength: 4, commutative: false},
-		{name: "ShiftLeftAndFillUpperFromMaskedInt32x4", argLength: 4, commutative: false},
-		{name: "ShiftLeftAndFillUpperFromMaskedInt32x8", argLength: 4, commutative: false},
-		{name: "ShiftLeftAndFillUpperFromMaskedInt32x16", argLength: 4, commutative: false},
-		{name: "ShiftLeftAndFillUpperFromMaskedInt64x2", argLength: 4, commutative: false},
-		{name: "ShiftLeftAndFillUpperFromMaskedInt64x4", argLength: 4, commutative: false},
-		{name: "ShiftLeftAndFillUpperFromMaskedInt64x8", argLength: 4, commutative: false},
-		{name: "ShiftLeftAndFillUpperFromMaskedUint16x8", argLength: 4, commutative: false},
-		{name: "ShiftLeftAndFillUpperFromMaskedUint16x16", argLength: 4, commutative: false},
-		{name: "ShiftLeftAndFillUpperFromMaskedUint16x32", argLength: 4, commutative: false},
-		{name: "ShiftLeftAndFillUpperFromMaskedUint32x4", argLength: 4, commutative: false},
-		{name: "ShiftLeftAndFillUpperFromMaskedUint32x8", argLength: 4, commutative: false},
-		{name: "ShiftLeftAndFillUpperFromMaskedUint32x16", argLength: 4, commutative: false},
-		{name: "ShiftLeftAndFillUpperFromMaskedUint64x2", argLength: 4, commutative: false},
-		{name: "ShiftLeftAndFillUpperFromMaskedUint64x4", argLength: 4, commutative: false},
-		{name: "ShiftLeftAndFillUpperFromMaskedUint64x8", argLength: 4, commutative: false},
-		{name: "ShiftLeftAndFillUpperFromUint16x8", argLength: 3, commutative: false},
-		{name: "ShiftLeftAndFillUpperFromUint16x16", argLength: 3, commutative: false},
-		{name: "ShiftLeftAndFillUpperFromUint16x32", argLength: 3, commutative: false},
-		{name: "ShiftLeftAndFillUpperFromUint32x4", argLength: 3, commutative: false},
-		{name: "ShiftLeftAndFillUpperFromUint32x8", argLength: 3, commutative: false},
-		{name: "ShiftLeftAndFillUpperFromUint32x16", argLength: 3, commutative: false},
-		{name: "ShiftLeftAndFillUpperFromUint64x2", argLength: 3, commutative: false},
-		{name: "ShiftLeftAndFillUpperFromUint64x4", argLength: 3, commutative: false},
-		{name: "ShiftLeftAndFillUpperFromUint64x8", argLength: 3, commutative: false},
+		{name: "ShiftLeftConcatInt16x8", argLength: 3, commutative: false},
+		{name: "ShiftLeftConcatInt16x16", argLength: 3, commutative: false},
+		{name: "ShiftLeftConcatInt16x32", argLength: 3, commutative: false},
+		{name: "ShiftLeftConcatInt32x4", argLength: 3, commutative: false},
+		{name: "ShiftLeftConcatInt32x8", argLength: 3, commutative: false},
+		{name: "ShiftLeftConcatInt32x16", argLength: 3, commutative: false},
+		{name: "ShiftLeftConcatInt64x2", argLength: 3, commutative: false},
+		{name: "ShiftLeftConcatInt64x4", argLength: 3, commutative: false},
+		{name: "ShiftLeftConcatInt64x8", argLength: 3, commutative: false},
+		{name: "ShiftLeftConcatMaskedInt16x8", argLength: 4, commutative: false},
+		{name: "ShiftLeftConcatMaskedInt16x16", argLength: 4, commutative: false},
+		{name: "ShiftLeftConcatMaskedInt16x32", argLength: 4, commutative: false},
+		{name: "ShiftLeftConcatMaskedInt32x4", argLength: 4, commutative: false},
+		{name: "ShiftLeftConcatMaskedInt32x8", argLength: 4, commutative: false},
+		{name: "ShiftLeftConcatMaskedInt32x16", argLength: 4, commutative: false},
+		{name: "ShiftLeftConcatMaskedInt64x2", argLength: 4, commutative: false},
+		{name: "ShiftLeftConcatMaskedInt64x4", argLength: 4, commutative: false},
+		{name: "ShiftLeftConcatMaskedInt64x8", argLength: 4, commutative: false},
+		{name: "ShiftLeftConcatMaskedUint16x8", argLength: 4, commutative: false},
+		{name: "ShiftLeftConcatMaskedUint16x16", argLength: 4, commutative: false},
+		{name: "ShiftLeftConcatMaskedUint16x32", argLength: 4, commutative: false},
+		{name: "ShiftLeftConcatMaskedUint32x4", argLength: 4, commutative: false},
+		{name: "ShiftLeftConcatMaskedUint32x8", argLength: 4, commutative: false},
+		{name: "ShiftLeftConcatMaskedUint32x16", argLength: 4, commutative: false},
+		{name: "ShiftLeftConcatMaskedUint64x2", argLength: 4, commutative: false},
+		{name: "ShiftLeftConcatMaskedUint64x4", argLength: 4, commutative: false},
+		{name: "ShiftLeftConcatMaskedUint64x8", argLength: 4, commutative: false},
+		{name: "ShiftLeftConcatUint16x8", argLength: 3, commutative: false},
+		{name: "ShiftLeftConcatUint16x16", argLength: 3, commutative: false},
+		{name: "ShiftLeftConcatUint16x32", argLength: 3, commutative: false},
+		{name: "ShiftLeftConcatUint32x4", argLength: 3, commutative: false},
+		{name: "ShiftLeftConcatUint32x8", argLength: 3, commutative: false},
+		{name: "ShiftLeftConcatUint32x16", argLength: 3, commutative: false},
+		{name: "ShiftLeftConcatUint64x2", argLength: 3, commutative: false},
+		{name: "ShiftLeftConcatUint64x4", argLength: 3, commutative: false},
+		{name: "ShiftLeftConcatUint64x8", argLength: 3, commutative: false},
 		{name: "ShiftLeftInt16x8", argLength: 2, commutative: false},
 		{name: "ShiftLeftInt16x16", argLength: 2, commutative: false},
 		{name: "ShiftLeftInt16x32", argLength: 2, commutative: false},
@@ -1350,42 +1350,42 @@ func simdGenericOps() []opData {
 		{name: "ShiftLeftUint64x2", argLength: 2, commutative: false},
 		{name: "ShiftLeftUint64x4", argLength: 2, commutative: false},
 		{name: "ShiftLeftUint64x8", argLength: 2, commutative: false},
-		{name: "ShiftRightAndFillUpperFromInt16x8", argLength: 3, commutative: false},
-		{name: "ShiftRightAndFillUpperFromInt16x16", argLength: 3, commutative: false},
-		{name: "ShiftRightAndFillUpperFromInt16x32", argLength: 3, commutative: false},
-		{name: "ShiftRightAndFillUpperFromInt32x4", argLength: 3, commutative: false},
-		{name: "ShiftRightAndFillUpperFromInt32x8", argLength: 3, commutative: false},
-		{name: "ShiftRightAndFillUpperFromInt32x16", argLength: 3, commutative: false},
-		{name: "ShiftRightAndFillUpperFromInt64x2", argLength: 3, commutative: false},
-		{name: "ShiftRightAndFillUpperFromInt64x4", argLength: 3, commutative: false},
-		{name: "ShiftRightAndFillUpperFromInt64x8", argLength: 3, commutative: false},
-		{name: "ShiftRightAndFillUpperFromMaskedInt16x8", argLength: 4, commutative: false},
-		{name: "ShiftRightAndFillUpperFromMaskedInt16x16", argLength: 4, commutative: false},
-		{name: "ShiftRightAndFillUpperFromMaskedInt16x32", argLength: 4, commutative: false},
-		{name: "ShiftRightAndFillUpperFromMaskedInt32x4", argLength: 4, commutative: false},
-		{name: "ShiftRightAndFillUpperFromMaskedInt32x8", argLength: 4, commutative: false},
-		{name: "ShiftRightAndFillUpperFromMaskedInt32x16", argLength: 4, commutative: false},
-		{name: "ShiftRightAndFillUpperFromMaskedInt64x2", argLength: 4, commutative: false},
-		{name: "ShiftRightAndFillUpperFromMaskedInt64x4", argLength: 4, commutative: false},
-		{name: "ShiftRightAndFillUpperFromMaskedInt64x8", argLength: 4, commutative: false},
-		{name: "ShiftRightAndFillUpperFromMaskedUint16x8", argLength: 4, commutative: false},
-		{name: "ShiftRightAndFillUpperFromMaskedUint16x16", argLength: 4, commutative: false},
-		{name: "ShiftRightAndFillUpperFromMaskedUint16x32", argLength: 4, commutative: false},
-		{name: "ShiftRightAndFillUpperFromMaskedUint32x4", argLength: 4, commutative: false},
-		{name: "ShiftRightAndFillUpperFromMaskedUint32x8", argLength: 4, commutative: false},
-		{name: "ShiftRightAndFillUpperFromMaskedUint32x16", argLength: 4, commutative: false},
-		{name: "ShiftRightAndFillUpperFromMaskedUint64x2", argLength: 4, commutative: false},
-		{name: "ShiftRightAndFillUpperFromMaskedUint64x4", argLength: 4, commutative: false},
-		{name: "ShiftRightAndFillUpperFromMaskedUint64x8", argLength: 4, commutative: false},
-		{name: "ShiftRightAndFillUpperFromUint16x8", argLength: 3, commutative: false},
-		{name: "ShiftRightAndFillUpperFromUint16x16", argLength: 3, commutative: false},
-		{name: "ShiftRightAndFillUpperFromUint16x32", argLength: 3, commutative: false},
-		{name: "ShiftRightAndFillUpperFromUint32x4", argLength: 3, commutative: false},
-		{name: "ShiftRightAndFillUpperFromUint32x8", argLength: 3, commutative: false},
-		{name: "ShiftRightAndFillUpperFromUint32x16", argLength: 3, commutative: false},
-		{name: "ShiftRightAndFillUpperFromUint64x2", argLength: 3, commutative: false},
-		{name: "ShiftRightAndFillUpperFromUint64x4", argLength: 3, commutative: false},
-		{name: "ShiftRightAndFillUpperFromUint64x8", argLength: 3, commutative: false},
+		{name: "ShiftRightConcatInt16x8", argLength: 3, commutative: false},
+		{name: "ShiftRightConcatInt16x16", argLength: 3, commutative: false},
+		{name: "ShiftRightConcatInt16x32", argLength: 3, commutative: false},
+		{name: "ShiftRightConcatInt32x4", argLength: 3, commutative: false},
+		{name: "ShiftRightConcatInt32x8", argLength: 3, commutative: false},
+		{name: "ShiftRightConcatInt32x16", argLength: 3, commutative: false},
+		{name: "ShiftRightConcatInt64x2", argLength: 3, commutative: false},
+		{name: "ShiftRightConcatInt64x4", argLength: 3, commutative: false},
+		{name: "ShiftRightConcatInt64x8", argLength: 3, commutative: false},
+		{name: "ShiftRightConcatMaskedInt16x8", argLength: 4, commutative: false},
+		{name: "ShiftRightConcatMaskedInt16x16", argLength: 4, commutative: false},
+		{name: "ShiftRightConcatMaskedInt16x32", argLength: 4, commutative: false},
+		{name: "ShiftRightConcatMaskedInt32x4", argLength: 4, commutative: false},
+		{name: "ShiftRightConcatMaskedInt32x8", argLength: 4, commutative: false},
+		{name: "ShiftRightConcatMaskedInt32x16", argLength: 4, commutative: false},
+		{name: "ShiftRightConcatMaskedInt64x2", argLength: 4, commutative: false},
+		{name: "ShiftRightConcatMaskedInt64x4", argLength: 4, commutative: false},
+		{name: "ShiftRightConcatMaskedInt64x8", argLength: 4, commutative: false},
+		{name: "ShiftRightConcatMaskedUint16x8", argLength: 4, commutative: false},
+		{name: "ShiftRightConcatMaskedUint16x16", argLength: 4, commutative: false},
+		{name: "ShiftRightConcatMaskedUint16x32", argLength: 4, commutative: false},
+		{name: "ShiftRightConcatMaskedUint32x4", argLength: 4, commutative: false},
+		{name: "ShiftRightConcatMaskedUint32x8", argLength: 4, commutative: false},
+		{name: "ShiftRightConcatMaskedUint32x16", argLength: 4, commutative: false},
+		{name: "ShiftRightConcatMaskedUint64x2", argLength: 4, commutative: false},
+		{name: "ShiftRightConcatMaskedUint64x4", argLength: 4, commutative: false},
+		{name: "ShiftRightConcatMaskedUint64x8", argLength: 4, commutative: false},
+		{name: "ShiftRightConcatUint16x8", argLength: 3, commutative: false},
+		{name: "ShiftRightConcatUint16x16", argLength: 3, commutative: false},
+		{name: "ShiftRightConcatUint16x32", argLength: 3, commutative: false},
+		{name: "ShiftRightConcatUint32x4", argLength: 3, commutative: false},
+		{name: "ShiftRightConcatUint32x8", argLength: 3, commutative: false},
+		{name: "ShiftRightConcatUint32x16", argLength: 3, commutative: false},
+		{name: "ShiftRightConcatUint64x2", argLength: 3, commutative: false},
+		{name: "ShiftRightConcatUint64x4", argLength: 3, commutative: false},
+		{name: "ShiftRightConcatUint64x8", argLength: 3, commutative: false},
 		{name: "ShiftRightInt16x8", argLength: 2, commutative: false},
 		{name: "ShiftRightInt16x16", argLength: 2, commutative: false},
 		{name: "ShiftRightInt16x32", argLength: 2, commutative: false},
@@ -1722,78 +1722,78 @@ func simdGenericOps() []opData {
 		{name: "SetElemUint16x8", argLength: 2, commutative: false, aux: "Int8"},
 		{name: "SetElemUint32x4", argLength: 2, commutative: false, aux: "Int8"},
 		{name: "SetElemUint64x2", argLength: 2, commutative: false, aux: "Int8"},
-		{name: "ShiftAllLeftAndFillUpperFromInt16x8", argLength: 2, commutative: false, aux: "Int8"},
-		{name: "ShiftAllLeftAndFillUpperFromInt16x16", argLength: 2, commutative: false, aux: "Int8"},
-		{name: "ShiftAllLeftAndFillUpperFromInt16x32", argLength: 2, commutative: false, aux: "Int8"},
-		{name: "ShiftAllLeftAndFillUpperFromInt32x4", argLength: 2, commutative: false, aux: "Int8"},
-		{name: "ShiftAllLeftAndFillUpperFromInt32x8", argLength: 2, commutative: false, aux: "Int8"},
-		{name: "ShiftAllLeftAndFillUpperFromInt32x16", argLength: 2, commutative: false, aux: "Int8"},
-		{name: "ShiftAllLeftAndFillUpperFromInt64x2", argLength: 2, commutative: false, aux: "Int8"},
-		{name: "ShiftAllLeftAndFillUpperFromInt64x4", argLength: 2, commutative: false, aux: "Int8"},
-		{name: "ShiftAllLeftAndFillUpperFromInt64x8", argLength: 2, commutative: false, aux: "Int8"},
-		{name: "ShiftAllLeftAndFillUpperFromMaskedInt16x8", argLength: 3, commutative: false, aux: "Int8"},
-		{name: "ShiftAllLeftAndFillUpperFromMaskedInt16x16", argLength: 3, commutative: false, aux: "Int8"},
-		{name: "ShiftAllLeftAndFillUpperFromMaskedInt16x32", argLength: 3, commutative: false, aux: "Int8"},
-		{name: "ShiftAllLeftAndFillUpperFromMaskedInt32x4", argLength: 3, commutative: false, aux: "Int8"},
-		{name: "ShiftAllLeftAndFillUpperFromMaskedInt32x8", argLength: 3, commutative: false, aux: "Int8"},
-		{name: "ShiftAllLeftAndFillUpperFromMaskedInt32x16", argLength: 3, commutative: false, aux: "Int8"},
-		{name: "ShiftAllLeftAndFillUpperFromMaskedInt64x2", argLength: 3, commutative: false, aux: "Int8"},
-		{name: "ShiftAllLeftAndFillUpperFromMaskedInt64x4", argLength: 3, commutative: false, aux: "Int8"},
-		{name: "ShiftAllLeftAndFillUpperFromMaskedInt64x8", argLength: 3, commutative: false, aux: "Int8"},
-		{name: "ShiftAllLeftAndFillUpperFromMaskedUint16x8", argLength: 3, commutative: false, aux: "Int8"},
-		{name: "ShiftAllLeftAndFillUpperFromMaskedUint16x16", argLength: 3, commutative: false, aux: "Int8"},
-		{name: "ShiftAllLeftAndFillUpperFromMaskedUint16x32", argLength: 3, commutative: false, aux: "Int8"},
-		{name: "ShiftAllLeftAndFillUpperFromMaskedUint32x4", argLength: 3, commutative: false, aux: "Int8"},
-		{name: "ShiftAllLeftAndFillUpperFromMaskedUint32x8", argLength: 3, commutative: false, aux: "Int8"},
-		{name: "ShiftAllLeftAndFillUpperFromMaskedUint32x16", argLength: 3, commutative: false, aux: "Int8"},
-		{name: "ShiftAllLeftAndFillUpperFromMaskedUint64x2", argLength: 3, commutative: false, aux: "Int8"},
-		{name: "ShiftAllLeftAndFillUpperFromMaskedUint64x4", argLength: 3, commutative: false, aux: "Int8"},
-		{name: "ShiftAllLeftAndFillUpperFromMaskedUint64x8", argLength: 3, commutative: false, aux: "Int8"},
-		{name: "ShiftAllLeftAndFillUpperFromUint16x8", argLength: 2, commutative: false, aux: "Int8"},
-		{name: "ShiftAllLeftAndFillUpperFromUint16x16", argLength: 2, commutative: false, aux: "Int8"},
-		{name: "ShiftAllLeftAndFillUpperFromUint16x32", argLength: 2, commutative: false, aux: "Int8"},
-		{name: "ShiftAllLeftAndFillUpperFromUint32x4", argLength: 2, commutative: false, aux: "Int8"},
-		{name: "ShiftAllLeftAndFillUpperFromUint32x8", argLength: 2, commutative: false, aux: "Int8"},
-		{name: "ShiftAllLeftAndFillUpperFromUint32x16", argLength: 2, commutative: false, aux: "Int8"},
-		{name: "ShiftAllLeftAndFillUpperFromUint64x2", argLength: 2, commutative: false, aux: "Int8"},
-		{name: "ShiftAllLeftAndFillUpperFromUint64x4", argLength: 2, commutative: false, aux: "Int8"},
-		{name: "ShiftAllLeftAndFillUpperFromUint64x8", argLength: 2, commutative: false, aux: "Int8"},
-		{name: "ShiftAllRightAndFillUpperFromInt16x8", argLength: 2, commutative: false, aux: "Int8"},
-		{name: "ShiftAllRightAndFillUpperFromInt16x16", argLength: 2, commutative: false, aux: "Int8"},
-		{name: "ShiftAllRightAndFillUpperFromInt16x32", argLength: 2, commutative: false, aux: "Int8"},
-		{name: "ShiftAllRightAndFillUpperFromInt32x4", argLength: 2, commutative: false, aux: "Int8"},
-		{name: "ShiftAllRightAndFillUpperFromInt32x8", argLength: 2, commutative: false, aux: "Int8"},
-		{name: "ShiftAllRightAndFillUpperFromInt32x16", argLength: 2, commutative: false, aux: "Int8"},
-		{name: "ShiftAllRightAndFillUpperFromInt64x2", argLength: 2, commutative: false, aux: "Int8"},
-		{name: "ShiftAllRightAndFillUpperFromInt64x4", argLength: 2, commutative: false, aux: "Int8"},
-		{name: "ShiftAllRightAndFillUpperFromInt64x8", argLength: 2, commutative: false, aux: "Int8"},
-		{name: "ShiftAllRightAndFillUpperFromMaskedInt16x8", argLength: 3, commutative: false, aux: "Int8"},
-		{name: "ShiftAllRightAndFillUpperFromMaskedInt16x16", argLength: 3, commutative: false, aux: "Int8"},
-		{name: "ShiftAllRightAndFillUpperFromMaskedInt16x32", argLength: 3, commutative: false, aux: "Int8"},
-		{name: "ShiftAllRightAndFillUpperFromMaskedInt32x4", argLength: 3, commutative: false, aux: "Int8"},
-		{name: "ShiftAllRightAndFillUpperFromMaskedInt32x8", argLength: 3, commutative: false, aux: "Int8"},
-		{name: "ShiftAllRightAndFillUpperFromMaskedInt32x16", argLength: 3, commutative: false, aux: "Int8"},
-		{name: "ShiftAllRightAndFillUpperFromMaskedInt64x2", argLength: 3, commutative: false, aux: "Int8"},
-		{name: "ShiftAllRightAndFillUpperFromMaskedInt64x4", argLength: 3, commutative: false, aux: "Int8"},
-		{name: "ShiftAllRightAndFillUpperFromMaskedInt64x8", argLength: 3, commutative: false, aux: "Int8"},
-		{name: "ShiftAllRightAndFillUpperFromMaskedUint16x8", argLength: 3, commutative: false, aux: "Int8"},
-		{name: "ShiftAllRightAndFillUpperFromMaskedUint16x16", argLength: 3, commutative: false, aux: "Int8"},
-		{name: "ShiftAllRightAndFillUpperFromMaskedUint16x32", argLength: 3, commutative: false, aux: "Int8"},
-		{name: "ShiftAllRightAndFillUpperFromMaskedUint32x4", argLength: 3, commutative: false, aux: "Int8"},
-		{name: "ShiftAllRightAndFillUpperFromMaskedUint32x8", argLength: 3, commutative: false, aux: "Int8"},
-		{name: "ShiftAllRightAndFillUpperFromMaskedUint32x16", argLength: 3, commutative: false, aux: "Int8"},
-		{name: "ShiftAllRightAndFillUpperFromMaskedUint64x2", argLength: 3, commutative: false, aux: "Int8"},
-		{name: "ShiftAllRightAndFillUpperFromMaskedUint64x4", argLength: 3, commutative: false, aux: "Int8"},
-		{name: "ShiftAllRightAndFillUpperFromMaskedUint64x8", argLength: 3, commutative: false, aux: "Int8"},
-		{name: "ShiftAllRightAndFillUpperFromUint16x8", argLength: 2, commutative: false, aux: "Int8"},
-		{name: "ShiftAllRightAndFillUpperFromUint16x16", argLength: 2, commutative: false, aux: "Int8"},
-		{name: "ShiftAllRightAndFillUpperFromUint16x32", argLength: 2, commutative: false, aux: "Int8"},
-		{name: "ShiftAllRightAndFillUpperFromUint32x4", argLength: 2, commutative: false, aux: "Int8"},
-		{name: "ShiftAllRightAndFillUpperFromUint32x8", argLength: 2, commutative: false, aux: "Int8"},
-		{name: "ShiftAllRightAndFillUpperFromUint32x16", argLength: 2, commutative: false, aux: "Int8"},
-		{name: "ShiftAllRightAndFillUpperFromUint64x2", argLength: 2, commutative: false, aux: "Int8"},
-		{name: "ShiftAllRightAndFillUpperFromUint64x4", argLength: 2, commutative: false, aux: "Int8"},
-		{name: "ShiftAllRightAndFillUpperFromUint64x8", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllLeftConcatInt16x8", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllLeftConcatInt16x16", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllLeftConcatInt16x32", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllLeftConcatInt32x4", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllLeftConcatInt32x8", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllLeftConcatInt32x16", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllLeftConcatInt64x2", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllLeftConcatInt64x4", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllLeftConcatInt64x8", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllLeftConcatMaskedInt16x8", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "ShiftAllLeftConcatMaskedInt16x16", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "ShiftAllLeftConcatMaskedInt16x32", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "ShiftAllLeftConcatMaskedInt32x4", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "ShiftAllLeftConcatMaskedInt32x8", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "ShiftAllLeftConcatMaskedInt32x16", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "ShiftAllLeftConcatMaskedInt64x2", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "ShiftAllLeftConcatMaskedInt64x4", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "ShiftAllLeftConcatMaskedInt64x8", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "ShiftAllLeftConcatMaskedUint16x8", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "ShiftAllLeftConcatMaskedUint16x16", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "ShiftAllLeftConcatMaskedUint16x32", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "ShiftAllLeftConcatMaskedUint32x4", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "ShiftAllLeftConcatMaskedUint32x8", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "ShiftAllLeftConcatMaskedUint32x16", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "ShiftAllLeftConcatMaskedUint64x2", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "ShiftAllLeftConcatMaskedUint64x4", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "ShiftAllLeftConcatMaskedUint64x8", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "ShiftAllLeftConcatUint16x8", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllLeftConcatUint16x16", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllLeftConcatUint16x32", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllLeftConcatUint32x4", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllLeftConcatUint32x8", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllLeftConcatUint32x16", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllLeftConcatUint64x2", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllLeftConcatUint64x4", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllLeftConcatUint64x8", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllRightConcatInt16x8", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllRightConcatInt16x16", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllRightConcatInt16x32", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllRightConcatInt32x4", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllRightConcatInt32x8", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllRightConcatInt32x16", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllRightConcatInt64x2", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllRightConcatInt64x4", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllRightConcatInt64x8", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllRightConcatMaskedInt16x8", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "ShiftAllRightConcatMaskedInt16x16", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "ShiftAllRightConcatMaskedInt16x32", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "ShiftAllRightConcatMaskedInt32x4", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "ShiftAllRightConcatMaskedInt32x8", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "ShiftAllRightConcatMaskedInt32x16", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "ShiftAllRightConcatMaskedInt64x2", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "ShiftAllRightConcatMaskedInt64x4", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "ShiftAllRightConcatMaskedInt64x8", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "ShiftAllRightConcatMaskedUint16x8", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "ShiftAllRightConcatMaskedUint16x16", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "ShiftAllRightConcatMaskedUint16x32", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "ShiftAllRightConcatMaskedUint32x4", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "ShiftAllRightConcatMaskedUint32x8", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "ShiftAllRightConcatMaskedUint32x16", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "ShiftAllRightConcatMaskedUint64x2", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "ShiftAllRightConcatMaskedUint64x4", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "ShiftAllRightConcatMaskedUint64x8", argLength: 3, commutative: false, aux: "Int8"},
+		{name: "ShiftAllRightConcatUint16x8", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllRightConcatUint16x16", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllRightConcatUint16x32", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllRightConcatUint32x4", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllRightConcatUint32x8", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllRightConcatUint32x16", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllRightConcatUint64x2", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllRightConcatUint64x4", argLength: 2, commutative: false, aux: "Int8"},
+		{name: "ShiftAllRightConcatUint64x8", argLength: 2, commutative: false, aux: "Int8"},
 		{name: "TruncWithPrecisionFloat32x4", argLength: 1, commutative: false, aux: "Int8"},
 		{name: "TruncWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
 		{name: "TruncWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index ed0203b639..de4477bc91 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -5764,42 +5764,42 @@ const (
 	OpShiftAllRightUint64x2
 	OpShiftAllRightUint64x4
 	OpShiftAllRightUint64x8
-	OpShiftLeftAndFillUpperFromInt16x8
-	OpShiftLeftAndFillUpperFromInt16x16
-	OpShiftLeftAndFillUpperFromInt16x32
-	OpShiftLeftAndFillUpperFromInt32x4
-	OpShiftLeftAndFillUpperFromInt32x8
-	OpShiftLeftAndFillUpperFromInt32x16
-	OpShiftLeftAndFillUpperFromInt64x2
-	OpShiftLeftAndFillUpperFromInt64x4
-	OpShiftLeftAndFillUpperFromInt64x8
-	OpShiftLeftAndFillUpperFromMaskedInt16x8
-	OpShiftLeftAndFillUpperFromMaskedInt16x16
-	OpShiftLeftAndFillUpperFromMaskedInt16x32
-	OpShiftLeftAndFillUpperFromMaskedInt32x4
-	OpShiftLeftAndFillUpperFromMaskedInt32x8
-	OpShiftLeftAndFillUpperFromMaskedInt32x16
-	OpShiftLeftAndFillUpperFromMaskedInt64x2
-	OpShiftLeftAndFillUpperFromMaskedInt64x4
-	OpShiftLeftAndFillUpperFromMaskedInt64x8
-	OpShiftLeftAndFillUpperFromMaskedUint16x8
-	OpShiftLeftAndFillUpperFromMaskedUint16x16
-	OpShiftLeftAndFillUpperFromMaskedUint16x32
-	OpShiftLeftAndFillUpperFromMaskedUint32x4
-	OpShiftLeftAndFillUpperFromMaskedUint32x8
-	OpShiftLeftAndFillUpperFromMaskedUint32x16
-	OpShiftLeftAndFillUpperFromMaskedUint64x2
-	OpShiftLeftAndFillUpperFromMaskedUint64x4
-	OpShiftLeftAndFillUpperFromMaskedUint64x8
-	OpShiftLeftAndFillUpperFromUint16x8
-	OpShiftLeftAndFillUpperFromUint16x16
-	OpShiftLeftAndFillUpperFromUint16x32
-	OpShiftLeftAndFillUpperFromUint32x4
-	OpShiftLeftAndFillUpperFromUint32x8
-	OpShiftLeftAndFillUpperFromUint32x16
-	OpShiftLeftAndFillUpperFromUint64x2
-	OpShiftLeftAndFillUpperFromUint64x4
-	OpShiftLeftAndFillUpperFromUint64x8
+	OpShiftLeftConcatInt16x8
+	OpShiftLeftConcatInt16x16
+	OpShiftLeftConcatInt16x32
+	OpShiftLeftConcatInt32x4
+	OpShiftLeftConcatInt32x8
+	OpShiftLeftConcatInt32x16
+	OpShiftLeftConcatInt64x2
+	OpShiftLeftConcatInt64x4
+	OpShiftLeftConcatInt64x8
+	OpShiftLeftConcatMaskedInt16x8
+	OpShiftLeftConcatMaskedInt16x16
+	OpShiftLeftConcatMaskedInt16x32
+	OpShiftLeftConcatMaskedInt32x4
+	OpShiftLeftConcatMaskedInt32x8
+	OpShiftLeftConcatMaskedInt32x16
+	OpShiftLeftConcatMaskedInt64x2
+	OpShiftLeftConcatMaskedInt64x4
+	OpShiftLeftConcatMaskedInt64x8
+	OpShiftLeftConcatMaskedUint16x8
+	OpShiftLeftConcatMaskedUint16x16
+	OpShiftLeftConcatMaskedUint16x32
+	OpShiftLeftConcatMaskedUint32x4
+	OpShiftLeftConcatMaskedUint32x8
+	OpShiftLeftConcatMaskedUint32x16
+	OpShiftLeftConcatMaskedUint64x2
+	OpShiftLeftConcatMaskedUint64x4
+	OpShiftLeftConcatMaskedUint64x8
+	OpShiftLeftConcatUint16x8
+	OpShiftLeftConcatUint16x16
+	OpShiftLeftConcatUint16x32
+	OpShiftLeftConcatUint32x4
+	OpShiftLeftConcatUint32x8
+	OpShiftLeftConcatUint32x16
+	OpShiftLeftConcatUint64x2
+	OpShiftLeftConcatUint64x4
+	OpShiftLeftConcatUint64x8
 	OpShiftLeftInt16x8
 	OpShiftLeftInt16x16
 	OpShiftLeftInt16x32
@@ -5836,42 +5836,42 @@ const (
 	OpShiftLeftUint64x2
 	OpShiftLeftUint64x4
 	OpShiftLeftUint64x8
-	OpShiftRightAndFillUpperFromInt16x8
-	OpShiftRightAndFillUpperFromInt16x16
-	OpShiftRightAndFillUpperFromInt16x32
-	OpShiftRightAndFillUpperFromInt32x4
-	OpShiftRightAndFillUpperFromInt32x8
-	OpShiftRightAndFillUpperFromInt32x16
-	OpShiftRightAndFillUpperFromInt64x2
-	OpShiftRightAndFillUpperFromInt64x4
-	OpShiftRightAndFillUpperFromInt64x8
-	OpShiftRightAndFillUpperFromMaskedInt16x8
-	OpShiftRightAndFillUpperFromMaskedInt16x16
-	OpShiftRightAndFillUpperFromMaskedInt16x32
-	OpShiftRightAndFillUpperFromMaskedInt32x4
-	OpShiftRightAndFillUpperFromMaskedInt32x8
-	OpShiftRightAndFillUpperFromMaskedInt32x16
-	OpShiftRightAndFillUpperFromMaskedInt64x2
-	OpShiftRightAndFillUpperFromMaskedInt64x4
-	OpShiftRightAndFillUpperFromMaskedInt64x8
-	OpShiftRightAndFillUpperFromMaskedUint16x8
-	OpShiftRightAndFillUpperFromMaskedUint16x16
-	OpShiftRightAndFillUpperFromMaskedUint16x32
-	OpShiftRightAndFillUpperFromMaskedUint32x4
-	OpShiftRightAndFillUpperFromMaskedUint32x8
-	OpShiftRightAndFillUpperFromMaskedUint32x16
-	OpShiftRightAndFillUpperFromMaskedUint64x2
-	OpShiftRightAndFillUpperFromMaskedUint64x4
-	OpShiftRightAndFillUpperFromMaskedUint64x8
-	OpShiftRightAndFillUpperFromUint16x8
-	OpShiftRightAndFillUpperFromUint16x16
-	OpShiftRightAndFillUpperFromUint16x32
-	OpShiftRightAndFillUpperFromUint32x4
-	OpShiftRightAndFillUpperFromUint32x8
-	OpShiftRightAndFillUpperFromUint32x16
-	OpShiftRightAndFillUpperFromUint64x2
-	OpShiftRightAndFillUpperFromUint64x4
-	OpShiftRightAndFillUpperFromUint64x8
+	OpShiftRightConcatInt16x8
+	OpShiftRightConcatInt16x16
+	OpShiftRightConcatInt16x32
+	OpShiftRightConcatInt32x4
+	OpShiftRightConcatInt32x8
+	OpShiftRightConcatInt32x16
+	OpShiftRightConcatInt64x2
+	OpShiftRightConcatInt64x4
+	OpShiftRightConcatInt64x8
+	OpShiftRightConcatMaskedInt16x8
+	OpShiftRightConcatMaskedInt16x16
+	OpShiftRightConcatMaskedInt16x32
+	OpShiftRightConcatMaskedInt32x4
+	OpShiftRightConcatMaskedInt32x8
+	OpShiftRightConcatMaskedInt32x16
+	OpShiftRightConcatMaskedInt64x2
+	OpShiftRightConcatMaskedInt64x4
+	OpShiftRightConcatMaskedInt64x8
+	OpShiftRightConcatMaskedUint16x8
+	OpShiftRightConcatMaskedUint16x16
+	OpShiftRightConcatMaskedUint16x32
+	OpShiftRightConcatMaskedUint32x4
+	OpShiftRightConcatMaskedUint32x8
+	OpShiftRightConcatMaskedUint32x16
+	OpShiftRightConcatMaskedUint64x2
+	OpShiftRightConcatMaskedUint64x4
+	OpShiftRightConcatMaskedUint64x8
+	OpShiftRightConcatUint16x8
+	OpShiftRightConcatUint16x16
+	OpShiftRightConcatUint16x32
+	OpShiftRightConcatUint32x4
+	OpShiftRightConcatUint32x8
+	OpShiftRightConcatUint32x16
+	OpShiftRightConcatUint64x2
+	OpShiftRightConcatUint64x4
+	OpShiftRightConcatUint64x8
 	OpShiftRightInt16x8
 	OpShiftRightInt16x16
 	OpShiftRightInt16x32
@@ -6208,78 +6208,78 @@ const (
 	OpSetElemUint16x8
 	OpSetElemUint32x4
 	OpSetElemUint64x2
-	OpShiftAllLeftAndFillUpperFromInt16x8
-	OpShiftAllLeftAndFillUpperFromInt16x16
-	OpShiftAllLeftAndFillUpperFromInt16x32
-	OpShiftAllLeftAndFillUpperFromInt32x4
-	OpShiftAllLeftAndFillUpperFromInt32x8
-	OpShiftAllLeftAndFillUpperFromInt32x16
-	OpShiftAllLeftAndFillUpperFromInt64x2
-	OpShiftAllLeftAndFillUpperFromInt64x4
-	OpShiftAllLeftAndFillUpperFromInt64x8
-	OpShiftAllLeftAndFillUpperFromMaskedInt16x8
-	OpShiftAllLeftAndFillUpperFromMaskedInt16x16
-	OpShiftAllLeftAndFillUpperFromMaskedInt16x32
-	OpShiftAllLeftAndFillUpperFromMaskedInt32x4
-	OpShiftAllLeftAndFillUpperFromMaskedInt32x8
-	OpShiftAllLeftAndFillUpperFromMaskedInt32x16
-	OpShiftAllLeftAndFillUpperFromMaskedInt64x2
-	OpShiftAllLeftAndFillUpperFromMaskedInt64x4
-	OpShiftAllLeftAndFillUpperFromMaskedInt64x8
-	OpShiftAllLeftAndFillUpperFromMaskedUint16x8
-	OpShiftAllLeftAndFillUpperFromMaskedUint16x16
-	OpShiftAllLeftAndFillUpperFromMaskedUint16x32
-	OpShiftAllLeftAndFillUpperFromMaskedUint32x4
-	OpShiftAllLeftAndFillUpperFromMaskedUint32x8
-	OpShiftAllLeftAndFillUpperFromMaskedUint32x16
-	OpShiftAllLeftAndFillUpperFromMaskedUint64x2
-	OpShiftAllLeftAndFillUpperFromMaskedUint64x4
-	OpShiftAllLeftAndFillUpperFromMaskedUint64x8
-	OpShiftAllLeftAndFillUpperFromUint16x8
-	OpShiftAllLeftAndFillUpperFromUint16x16
-	OpShiftAllLeftAndFillUpperFromUint16x32
-	OpShiftAllLeftAndFillUpperFromUint32x4
-	OpShiftAllLeftAndFillUpperFromUint32x8
-	OpShiftAllLeftAndFillUpperFromUint32x16
-	OpShiftAllLeftAndFillUpperFromUint64x2
-	OpShiftAllLeftAndFillUpperFromUint64x4
-	OpShiftAllLeftAndFillUpperFromUint64x8
-	OpShiftAllRightAndFillUpperFromInt16x8
-	OpShiftAllRightAndFillUpperFromInt16x16
-	OpShiftAllRightAndFillUpperFromInt16x32
-	OpShiftAllRightAndFillUpperFromInt32x4
-	OpShiftAllRightAndFillUpperFromInt32x8
-	OpShiftAllRightAndFillUpperFromInt32x16
-	OpShiftAllRightAndFillUpperFromInt64x2
-	OpShiftAllRightAndFillUpperFromInt64x4
-	OpShiftAllRightAndFillUpperFromInt64x8
-	OpShiftAllRightAndFillUpperFromMaskedInt16x8
-	OpShiftAllRightAndFillUpperFromMaskedInt16x16
-	OpShiftAllRightAndFillUpperFromMaskedInt16x32
-	OpShiftAllRightAndFillUpperFromMaskedInt32x4
-	OpShiftAllRightAndFillUpperFromMaskedInt32x8
-	OpShiftAllRightAndFillUpperFromMaskedInt32x16
-	OpShiftAllRightAndFillUpperFromMaskedInt64x2
-	OpShiftAllRightAndFillUpperFromMaskedInt64x4
-	OpShiftAllRightAndFillUpperFromMaskedInt64x8
-	OpShiftAllRightAndFillUpperFromMaskedUint16x8
-	OpShiftAllRightAndFillUpperFromMaskedUint16x16
-	OpShiftAllRightAndFillUpperFromMaskedUint16x32
-	OpShiftAllRightAndFillUpperFromMaskedUint32x4
-	OpShiftAllRightAndFillUpperFromMaskedUint32x8
-	OpShiftAllRightAndFillUpperFromMaskedUint32x16
-	OpShiftAllRightAndFillUpperFromMaskedUint64x2
-	OpShiftAllRightAndFillUpperFromMaskedUint64x4
-	OpShiftAllRightAndFillUpperFromMaskedUint64x8
-	OpShiftAllRightAndFillUpperFromUint16x8
-	OpShiftAllRightAndFillUpperFromUint16x16
-	OpShiftAllRightAndFillUpperFromUint16x32
-	OpShiftAllRightAndFillUpperFromUint32x4
-	OpShiftAllRightAndFillUpperFromUint32x8
-	OpShiftAllRightAndFillUpperFromUint32x16
-	OpShiftAllRightAndFillUpperFromUint64x2
-	OpShiftAllRightAndFillUpperFromUint64x4
-	OpShiftAllRightAndFillUpperFromUint64x8
+	OpShiftAllLeftConcatInt16x8
+	OpShiftAllLeftConcatInt16x16
+	OpShiftAllLeftConcatInt16x32
+	OpShiftAllLeftConcatInt32x4
+	OpShiftAllLeftConcatInt32x8
+	OpShiftAllLeftConcatInt32x16
+	OpShiftAllLeftConcatInt64x2
+	OpShiftAllLeftConcatInt64x4
+	OpShiftAllLeftConcatInt64x8
+	OpShiftAllLeftConcatMaskedInt16x8
+	OpShiftAllLeftConcatMaskedInt16x16
+	OpShiftAllLeftConcatMaskedInt16x32
+	OpShiftAllLeftConcatMaskedInt32x4
+	OpShiftAllLeftConcatMaskedInt32x8
+	OpShiftAllLeftConcatMaskedInt32x16
+	OpShiftAllLeftConcatMaskedInt64x2
+	OpShiftAllLeftConcatMaskedInt64x4
+	OpShiftAllLeftConcatMaskedInt64x8
+	OpShiftAllLeftConcatMaskedUint16x8
+	OpShiftAllLeftConcatMaskedUint16x16
+	OpShiftAllLeftConcatMaskedUint16x32
+	OpShiftAllLeftConcatMaskedUint32x4
+	OpShiftAllLeftConcatMaskedUint32x8
+	OpShiftAllLeftConcatMaskedUint32x16
+	OpShiftAllLeftConcatMaskedUint64x2
+	OpShiftAllLeftConcatMaskedUint64x4
+	OpShiftAllLeftConcatMaskedUint64x8
+	OpShiftAllLeftConcatUint16x8
+	OpShiftAllLeftConcatUint16x16
+	OpShiftAllLeftConcatUint16x32
+	OpShiftAllLeftConcatUint32x4
+	OpShiftAllLeftConcatUint32x8
+	OpShiftAllLeftConcatUint32x16
+	OpShiftAllLeftConcatUint64x2
+	OpShiftAllLeftConcatUint64x4
+	OpShiftAllLeftConcatUint64x8
+	OpShiftAllRightConcatInt16x8
+	OpShiftAllRightConcatInt16x16
+	OpShiftAllRightConcatInt16x32
+	OpShiftAllRightConcatInt32x4
+	OpShiftAllRightConcatInt32x8
+	OpShiftAllRightConcatInt32x16
+	OpShiftAllRightConcatInt64x2
+	OpShiftAllRightConcatInt64x4
+	OpShiftAllRightConcatInt64x8
+	OpShiftAllRightConcatMaskedInt16x8
+	OpShiftAllRightConcatMaskedInt16x16
+	OpShiftAllRightConcatMaskedInt16x32
+	OpShiftAllRightConcatMaskedInt32x4
+	OpShiftAllRightConcatMaskedInt32x8
+	OpShiftAllRightConcatMaskedInt32x16
+	OpShiftAllRightConcatMaskedInt64x2
+	OpShiftAllRightConcatMaskedInt64x4
+	OpShiftAllRightConcatMaskedInt64x8
+	OpShiftAllRightConcatMaskedUint16x8
+	OpShiftAllRightConcatMaskedUint16x16
+	OpShiftAllRightConcatMaskedUint16x32
+	OpShiftAllRightConcatMaskedUint32x4
+	OpShiftAllRightConcatMaskedUint32x8
+	OpShiftAllRightConcatMaskedUint32x16
+	OpShiftAllRightConcatMaskedUint64x2
+	OpShiftAllRightConcatMaskedUint64x4
+	OpShiftAllRightConcatMaskedUint64x8
+	OpShiftAllRightConcatUint16x8
+	OpShiftAllRightConcatUint16x16
+	OpShiftAllRightConcatUint16x32
+	OpShiftAllRightConcatUint32x4
+	OpShiftAllRightConcatUint32x8
+	OpShiftAllRightConcatUint32x16
+	OpShiftAllRightConcatUint64x2
+	OpShiftAllRightConcatUint64x4
+	OpShiftAllRightConcatUint64x8
 	OpTruncWithPrecisionFloat32x4
 	OpTruncWithPrecisionFloat32x8
 	OpTruncWithPrecisionFloat32x16
@@ -68518,182 +68518,182 @@ var opcodeTable = [...]opInfo{
 		generic: true,
 	},
 	{
-		name:    "ShiftLeftAndFillUpperFromInt16x8",
+		name:    "ShiftLeftConcatInt16x8",
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftLeftAndFillUpperFromInt16x16",
+		name:    "ShiftLeftConcatInt16x16",
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftLeftAndFillUpperFromInt16x32",
+		name:    "ShiftLeftConcatInt16x32",
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftLeftAndFillUpperFromInt32x4",
+		name:    "ShiftLeftConcatInt32x4",
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftLeftAndFillUpperFromInt32x8",
+		name:    "ShiftLeftConcatInt32x8",
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftLeftAndFillUpperFromInt32x16",
+		name:    "ShiftLeftConcatInt32x16",
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftLeftAndFillUpperFromInt64x2",
+		name:    "ShiftLeftConcatInt64x2",
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftLeftAndFillUpperFromInt64x4",
+		name:    "ShiftLeftConcatInt64x4",
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftLeftAndFillUpperFromInt64x8",
+		name:    "ShiftLeftConcatInt64x8",
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftLeftAndFillUpperFromMaskedInt16x8",
+		name:    "ShiftLeftConcatMaskedInt16x8",
 		argLen:  4,
 		generic: true,
 	},
 	{
-		name:    "ShiftLeftAndFillUpperFromMaskedInt16x16",
+		name:    "ShiftLeftConcatMaskedInt16x16",
 		argLen:  4,
 		generic: true,
 	},
 	{
-		name:    "ShiftLeftAndFillUpperFromMaskedInt16x32",
+		name:    "ShiftLeftConcatMaskedInt16x32",
 		argLen:  4,
 		generic: true,
 	},
 	{
-		name:    "ShiftLeftAndFillUpperFromMaskedInt32x4",
+		name:    "ShiftLeftConcatMaskedInt32x4",
 		argLen:  4,
 		generic: true,
 	},
 	{
-		name:    "ShiftLeftAndFillUpperFromMaskedInt32x8",
+		name:    "ShiftLeftConcatMaskedInt32x8",
 		argLen:  4,
 		generic: true,
 	},
 	{
-		name:    "ShiftLeftAndFillUpperFromMaskedInt32x16",
+		name:    "ShiftLeftConcatMaskedInt32x16",
 		argLen:  4,
 		generic: true,
 	},
 	{
-		name:    "ShiftLeftAndFillUpperFromMaskedInt64x2",
+		name:    "ShiftLeftConcatMaskedInt64x2",
 		argLen:  4,
 		generic: true,
 	},
 	{
-		name:    "ShiftLeftAndFillUpperFromMaskedInt64x4",
+		name:    "ShiftLeftConcatMaskedInt64x4",
 		argLen:  4,
 		generic: true,
 	},
 	{
-		name:    "ShiftLeftAndFillUpperFromMaskedInt64x8",
+		name:    "ShiftLeftConcatMaskedInt64x8",
 		argLen:  4,
 		generic: true,
 	},
 	{
-		name:    "ShiftLeftAndFillUpperFromMaskedUint16x8",
+		name:    "ShiftLeftConcatMaskedUint16x8",
 		argLen:  4,
 		generic: true,
 	},
 	{
-		name:    "ShiftLeftAndFillUpperFromMaskedUint16x16",
+		name:    "ShiftLeftConcatMaskedUint16x16",
 		argLen:  4,
 		generic: true,
 	},
 	{
-		name:    "ShiftLeftAndFillUpperFromMaskedUint16x32",
+		name:    "ShiftLeftConcatMaskedUint16x32",
 		argLen:  4,
 		generic: true,
 	},
 	{
-		name:    "ShiftLeftAndFillUpperFromMaskedUint32x4",
+		name:    "ShiftLeftConcatMaskedUint32x4",
 		argLen:  4,
 		generic: true,
 	},
 	{
-		name:    "ShiftLeftAndFillUpperFromMaskedUint32x8",
+		name:    "ShiftLeftConcatMaskedUint32x8",
 		argLen:  4,
 		generic: true,
 	},
 	{
-		name:    "ShiftLeftAndFillUpperFromMaskedUint32x16",
+		name:    "ShiftLeftConcatMaskedUint32x16",
 		argLen:  4,
 		generic: true,
 	},
 	{
-		name:    "ShiftLeftAndFillUpperFromMaskedUint64x2",
+		name:    "ShiftLeftConcatMaskedUint64x2",
 		argLen:  4,
 		generic: true,
 	},
 	{
-		name:    "ShiftLeftAndFillUpperFromMaskedUint64x4",
+		name:    "ShiftLeftConcatMaskedUint64x4",
 		argLen:  4,
 		generic: true,
 	},
 	{
-		name:    "ShiftLeftAndFillUpperFromMaskedUint64x8",
+		name:    "ShiftLeftConcatMaskedUint64x8",
 		argLen:  4,
 		generic: true,
 	},
 	{
-		name:    "ShiftLeftAndFillUpperFromUint16x8",
+		name:    "ShiftLeftConcatUint16x8",
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftLeftAndFillUpperFromUint16x16",
+		name:    "ShiftLeftConcatUint16x16",
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftLeftAndFillUpperFromUint16x32",
+		name:    "ShiftLeftConcatUint16x32",
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftLeftAndFillUpperFromUint32x4",
+		name:    "ShiftLeftConcatUint32x4",
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftLeftAndFillUpperFromUint32x8",
+		name:    "ShiftLeftConcatUint32x8",
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftLeftAndFillUpperFromUint32x16",
+		name:    "ShiftLeftConcatUint32x16",
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftLeftAndFillUpperFromUint64x2",
+		name:    "ShiftLeftConcatUint64x2",
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftLeftAndFillUpperFromUint64x4",
+		name:    "ShiftLeftConcatUint64x4",
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftLeftAndFillUpperFromUint64x8",
+		name:    "ShiftLeftConcatUint64x8",
 		argLen:  3,
 		generic: true,
 	},
@@ -68878,182 +68878,182 @@ var opcodeTable = [...]opInfo{
 		generic: true,
 	},
 	{
-		name:    "ShiftRightAndFillUpperFromInt16x8",
+		name:    "ShiftRightConcatInt16x8",
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftRightAndFillUpperFromInt16x16",
+		name:    "ShiftRightConcatInt16x16",
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftRightAndFillUpperFromInt16x32",
+		name:    "ShiftRightConcatInt16x32",
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftRightAndFillUpperFromInt32x4",
+		name:    "ShiftRightConcatInt32x4",
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftRightAndFillUpperFromInt32x8",
+		name:    "ShiftRightConcatInt32x8",
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftRightAndFillUpperFromInt32x16",
+		name:    "ShiftRightConcatInt32x16",
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftRightAndFillUpperFromInt64x2",
+		name:    "ShiftRightConcatInt64x2",
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftRightAndFillUpperFromInt64x4",
+		name:    "ShiftRightConcatInt64x4",
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftRightAndFillUpperFromInt64x8",
+		name:    "ShiftRightConcatInt64x8",
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftRightAndFillUpperFromMaskedInt16x8",
+		name:    "ShiftRightConcatMaskedInt16x8",
 		argLen:  4,
 		generic: true,
 	},
 	{
-		name:    "ShiftRightAndFillUpperFromMaskedInt16x16",
+		name:    "ShiftRightConcatMaskedInt16x16",
 		argLen:  4,
 		generic: true,
 	},
 	{
-		name:    "ShiftRightAndFillUpperFromMaskedInt16x32",
+		name:    "ShiftRightConcatMaskedInt16x32",
 		argLen:  4,
 		generic: true,
 	},
 	{
-		name:    "ShiftRightAndFillUpperFromMaskedInt32x4",
+		name:    "ShiftRightConcatMaskedInt32x4",
 		argLen:  4,
 		generic: true,
 	},
 	{
-		name:    "ShiftRightAndFillUpperFromMaskedInt32x8",
+		name:    "ShiftRightConcatMaskedInt32x8",
 		argLen:  4,
 		generic: true,
 	},
 	{
-		name:    "ShiftRightAndFillUpperFromMaskedInt32x16",
+		name:    "ShiftRightConcatMaskedInt32x16",
 		argLen:  4,
 		generic: true,
 	},
 	{
-		name:    "ShiftRightAndFillUpperFromMaskedInt64x2",
+		name:    "ShiftRightConcatMaskedInt64x2",
 		argLen:  4,
 		generic: true,
 	},
 	{
-		name:    "ShiftRightAndFillUpperFromMaskedInt64x4",
+		name:    "ShiftRightConcatMaskedInt64x4",
 		argLen:  4,
 		generic: true,
 	},
 	{
-		name:    "ShiftRightAndFillUpperFromMaskedInt64x8",
+		name:    "ShiftRightConcatMaskedInt64x8",
 		argLen:  4,
 		generic: true,
 	},
 	{
-		name:    "ShiftRightAndFillUpperFromMaskedUint16x8",
+		name:    "ShiftRightConcatMaskedUint16x8",
 		argLen:  4,
 		generic: true,
 	},
 	{
-		name:    "ShiftRightAndFillUpperFromMaskedUint16x16",
+		name:    "ShiftRightConcatMaskedUint16x16",
 		argLen:  4,
 		generic: true,
 	},
 	{
-		name:    "ShiftRightAndFillUpperFromMaskedUint16x32",
+		name:    "ShiftRightConcatMaskedUint16x32",
 		argLen:  4,
 		generic: true,
 	},
 	{
-		name:    "ShiftRightAndFillUpperFromMaskedUint32x4",
+		name:    "ShiftRightConcatMaskedUint32x4",
 		argLen:  4,
 		generic: true,
 	},
 	{
-		name:    "ShiftRightAndFillUpperFromMaskedUint32x8",
+		name:    "ShiftRightConcatMaskedUint32x8",
 		argLen:  4,
 		generic: true,
 	},
 	{
-		name:    "ShiftRightAndFillUpperFromMaskedUint32x16",
+		name:    "ShiftRightConcatMaskedUint32x16",
 		argLen:  4,
 		generic: true,
 	},
 	{
-		name:    "ShiftRightAndFillUpperFromMaskedUint64x2",
+		name:    "ShiftRightConcatMaskedUint64x2",
 		argLen:  4,
 		generic: true,
 	},
 	{
-		name:    "ShiftRightAndFillUpperFromMaskedUint64x4",
+		name:    "ShiftRightConcatMaskedUint64x4",
 		argLen:  4,
 		generic: true,
 	},
 	{
-		name:    "ShiftRightAndFillUpperFromMaskedUint64x8",
+		name:    "ShiftRightConcatMaskedUint64x8",
 		argLen:  4,
 		generic: true,
 	},
 	{
-		name:    "ShiftRightAndFillUpperFromUint16x8",
+		name:    "ShiftRightConcatUint16x8",
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftRightAndFillUpperFromUint16x16",
+		name:    "ShiftRightConcatUint16x16",
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftRightAndFillUpperFromUint16x32",
+		name:    "ShiftRightConcatUint16x32",
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftRightAndFillUpperFromUint32x4",
+		name:    "ShiftRightConcatUint32x4",
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftRightAndFillUpperFromUint32x8",
+		name:    "ShiftRightConcatUint32x8",
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftRightAndFillUpperFromUint32x16",
+		name:    "ShiftRightConcatUint32x16",
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftRightAndFillUpperFromUint64x2",
+		name:    "ShiftRightConcatUint64x2",
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftRightAndFillUpperFromUint64x4",
+		name:    "ShiftRightConcatUint64x4",
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftRightAndFillUpperFromUint64x8",
+		name:    "ShiftRightConcatUint64x8",
 		argLen:  3,
 		generic: true,
 	},
@@ -70950,433 +70950,433 @@ var opcodeTable = [...]opInfo{
 		generic: true,
 	},
 	{
-		name:    "ShiftAllLeftAndFillUpperFromInt16x8",
+		name:    "ShiftAllLeftConcatInt16x8",
 		auxType: auxInt8,
 		argLen:  2,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllLeftAndFillUpperFromInt16x16",
+		name:    "ShiftAllLeftConcatInt16x16",
 		auxType: auxInt8,
 		argLen:  2,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllLeftAndFillUpperFromInt16x32",
+		name:    "ShiftAllLeftConcatInt16x32",
 		auxType: auxInt8,
 		argLen:  2,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllLeftAndFillUpperFromInt32x4",
+		name:    "ShiftAllLeftConcatInt32x4",
 		auxType: auxInt8,
 		argLen:  2,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllLeftAndFillUpperFromInt32x8",
+		name:    "ShiftAllLeftConcatInt32x8",
 		auxType: auxInt8,
 		argLen:  2,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllLeftAndFillUpperFromInt32x16",
+		name:    "ShiftAllLeftConcatInt32x16",
 		auxType: auxInt8,
 		argLen:  2,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllLeftAndFillUpperFromInt64x2",
+		name:    "ShiftAllLeftConcatInt64x2",
 		auxType: auxInt8,
 		argLen:  2,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllLeftAndFillUpperFromInt64x4",
+		name:    "ShiftAllLeftConcatInt64x4",
 		auxType: auxInt8,
 		argLen:  2,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllLeftAndFillUpperFromInt64x8",
+		name:    "ShiftAllLeftConcatInt64x8",
 		auxType: auxInt8,
 		argLen:  2,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllLeftAndFillUpperFromMaskedInt16x8",
+		name:    "ShiftAllLeftConcatMaskedInt16x8",
 		auxType: auxInt8,
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllLeftAndFillUpperFromMaskedInt16x16",
+		name:    "ShiftAllLeftConcatMaskedInt16x16",
 		auxType: auxInt8,
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllLeftAndFillUpperFromMaskedInt16x32",
+		name:    "ShiftAllLeftConcatMaskedInt16x32",
 		auxType: auxInt8,
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllLeftAndFillUpperFromMaskedInt32x4",
+		name:    "ShiftAllLeftConcatMaskedInt32x4",
 		auxType: auxInt8,
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllLeftAndFillUpperFromMaskedInt32x8",
+		name:    "ShiftAllLeftConcatMaskedInt32x8",
 		auxType: auxInt8,
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllLeftAndFillUpperFromMaskedInt32x16",
+		name:    "ShiftAllLeftConcatMaskedInt32x16",
 		auxType: auxInt8,
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllLeftAndFillUpperFromMaskedInt64x2",
+		name:    "ShiftAllLeftConcatMaskedInt64x2",
 		auxType: auxInt8,
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllLeftAndFillUpperFromMaskedInt64x4",
+		name:    "ShiftAllLeftConcatMaskedInt64x4",
 		auxType: auxInt8,
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllLeftAndFillUpperFromMaskedInt64x8",
+		name:    "ShiftAllLeftConcatMaskedInt64x8",
 		auxType: auxInt8,
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllLeftAndFillUpperFromMaskedUint16x8",
+		name:    "ShiftAllLeftConcatMaskedUint16x8",
 		auxType: auxInt8,
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllLeftAndFillUpperFromMaskedUint16x16",
+		name:    "ShiftAllLeftConcatMaskedUint16x16",
 		auxType: auxInt8,
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllLeftAndFillUpperFromMaskedUint16x32",
+		name:    "ShiftAllLeftConcatMaskedUint16x32",
 		auxType: auxInt8,
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllLeftAndFillUpperFromMaskedUint32x4",
+		name:    "ShiftAllLeftConcatMaskedUint32x4",
 		auxType: auxInt8,
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllLeftAndFillUpperFromMaskedUint32x8",
+		name:    "ShiftAllLeftConcatMaskedUint32x8",
 		auxType: auxInt8,
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllLeftAndFillUpperFromMaskedUint32x16",
+		name:    "ShiftAllLeftConcatMaskedUint32x16",
 		auxType: auxInt8,
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllLeftAndFillUpperFromMaskedUint64x2",
+		name:    "ShiftAllLeftConcatMaskedUint64x2",
 		auxType: auxInt8,
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllLeftAndFillUpperFromMaskedUint64x4",
+		name:    "ShiftAllLeftConcatMaskedUint64x4",
 		auxType: auxInt8,
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllLeftAndFillUpperFromMaskedUint64x8",
+		name:    "ShiftAllLeftConcatMaskedUint64x8",
 		auxType: auxInt8,
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllLeftAndFillUpperFromUint16x8",
+		name:    "ShiftAllLeftConcatUint16x8",
 		auxType: auxInt8,
 		argLen:  2,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllLeftAndFillUpperFromUint16x16",
+		name:    "ShiftAllLeftConcatUint16x16",
 		auxType: auxInt8,
 		argLen:  2,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllLeftAndFillUpperFromUint16x32",
+		name:    "ShiftAllLeftConcatUint16x32",
 		auxType: auxInt8,
 		argLen:  2,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllLeftAndFillUpperFromUint32x4",
+		name:    "ShiftAllLeftConcatUint32x4",
 		auxType: auxInt8,
 		argLen:  2,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllLeftAndFillUpperFromUint32x8",
+		name:    "ShiftAllLeftConcatUint32x8",
 		auxType: auxInt8,
 		argLen:  2,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllLeftAndFillUpperFromUint32x16",
+		name:    "ShiftAllLeftConcatUint32x16",
 		auxType: auxInt8,
 		argLen:  2,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllLeftAndFillUpperFromUint64x2",
+		name:    "ShiftAllLeftConcatUint64x2",
 		auxType: auxInt8,
 		argLen:  2,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllLeftAndFillUpperFromUint64x4",
+		name:    "ShiftAllLeftConcatUint64x4",
 		auxType: auxInt8,
 		argLen:  2,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllLeftAndFillUpperFromUint64x8",
+		name:    "ShiftAllLeftConcatUint64x8",
 		auxType: auxInt8,
 		argLen:  2,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllRightAndFillUpperFromInt16x8",
+		name:    "ShiftAllRightConcatInt16x8",
 		auxType: auxInt8,
 		argLen:  2,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllRightAndFillUpperFromInt16x16",
+		name:    "ShiftAllRightConcatInt16x16",
 		auxType: auxInt8,
 		argLen:  2,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllRightAndFillUpperFromInt16x32",
+		name:    "ShiftAllRightConcatInt16x32",
 		auxType: auxInt8,
 		argLen:  2,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllRightAndFillUpperFromInt32x4",
+		name:    "ShiftAllRightConcatInt32x4",
 		auxType: auxInt8,
 		argLen:  2,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllRightAndFillUpperFromInt32x8",
+		name:    "ShiftAllRightConcatInt32x8",
 		auxType: auxInt8,
 		argLen:  2,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllRightAndFillUpperFromInt32x16",
+		name:    "ShiftAllRightConcatInt32x16",
 		auxType: auxInt8,
 		argLen:  2,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllRightAndFillUpperFromInt64x2",
+		name:    "ShiftAllRightConcatInt64x2",
 		auxType: auxInt8,
 		argLen:  2,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllRightAndFillUpperFromInt64x4",
+		name:    "ShiftAllRightConcatInt64x4",
 		auxType: auxInt8,
 		argLen:  2,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllRightAndFillUpperFromInt64x8",
+		name:    "ShiftAllRightConcatInt64x8",
 		auxType: auxInt8,
 		argLen:  2,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllRightAndFillUpperFromMaskedInt16x8",
+		name:    "ShiftAllRightConcatMaskedInt16x8",
 		auxType: auxInt8,
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllRightAndFillUpperFromMaskedInt16x16",
+		name:    "ShiftAllRightConcatMaskedInt16x16",
 		auxType: auxInt8,
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllRightAndFillUpperFromMaskedInt16x32",
+		name:    "ShiftAllRightConcatMaskedInt16x32",
 		auxType: auxInt8,
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllRightAndFillUpperFromMaskedInt32x4",
+		name:    "ShiftAllRightConcatMaskedInt32x4",
 		auxType: auxInt8,
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllRightAndFillUpperFromMaskedInt32x8",
+		name:    "ShiftAllRightConcatMaskedInt32x8",
 		auxType: auxInt8,
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllRightAndFillUpperFromMaskedInt32x16",
+		name:    "ShiftAllRightConcatMaskedInt32x16",
 		auxType: auxInt8,
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllRightAndFillUpperFromMaskedInt64x2",
+		name:    "ShiftAllRightConcatMaskedInt64x2",
 		auxType: auxInt8,
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllRightAndFillUpperFromMaskedInt64x4",
+		name:    "ShiftAllRightConcatMaskedInt64x4",
 		auxType: auxInt8,
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllRightAndFillUpperFromMaskedInt64x8",
+		name:    "ShiftAllRightConcatMaskedInt64x8",
 		auxType: auxInt8,
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllRightAndFillUpperFromMaskedUint16x8",
+		name:    "ShiftAllRightConcatMaskedUint16x8",
 		auxType: auxInt8,
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllRightAndFillUpperFromMaskedUint16x16",
+		name:    "ShiftAllRightConcatMaskedUint16x16",
 		auxType: auxInt8,
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllRightAndFillUpperFromMaskedUint16x32",
+		name:    "ShiftAllRightConcatMaskedUint16x32",
 		auxType: auxInt8,
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllRightAndFillUpperFromMaskedUint32x4",
+		name:    "ShiftAllRightConcatMaskedUint32x4",
 		auxType: auxInt8,
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllRightAndFillUpperFromMaskedUint32x8",
+		name:    "ShiftAllRightConcatMaskedUint32x8",
 		auxType: auxInt8,
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllRightAndFillUpperFromMaskedUint32x16",
+		name:    "ShiftAllRightConcatMaskedUint32x16",
 		auxType: auxInt8,
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllRightAndFillUpperFromMaskedUint64x2",
+		name:    "ShiftAllRightConcatMaskedUint64x2",
 		auxType: auxInt8,
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllRightAndFillUpperFromMaskedUint64x4",
+		name:    "ShiftAllRightConcatMaskedUint64x4",
 		auxType: auxInt8,
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllRightAndFillUpperFromMaskedUint64x8",
+		name:    "ShiftAllRightConcatMaskedUint64x8",
 		auxType: auxInt8,
 		argLen:  3,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllRightAndFillUpperFromUint16x8",
+		name:    "ShiftAllRightConcatUint16x8",
 		auxType: auxInt8,
 		argLen:  2,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllRightAndFillUpperFromUint16x16",
+		name:    "ShiftAllRightConcatUint16x16",
 		auxType: auxInt8,
 		argLen:  2,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllRightAndFillUpperFromUint16x32",
+		name:    "ShiftAllRightConcatUint16x32",
 		auxType: auxInt8,
 		argLen:  2,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllRightAndFillUpperFromUint32x4",
+		name:    "ShiftAllRightConcatUint32x4",
 		auxType: auxInt8,
 		argLen:  2,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllRightAndFillUpperFromUint32x8",
+		name:    "ShiftAllRightConcatUint32x8",
 		auxType: auxInt8,
 		argLen:  2,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllRightAndFillUpperFromUint32x16",
+		name:    "ShiftAllRightConcatUint32x16",
 		auxType: auxInt8,
 		argLen:  2,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllRightAndFillUpperFromUint64x2",
+		name:    "ShiftAllRightConcatUint64x2",
 		auxType: auxInt8,
 		argLen:  2,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllRightAndFillUpperFromUint64x4",
+		name:    "ShiftAllRightConcatUint64x4",
 		auxType: auxInt8,
 		argLen:  2,
 		generic: true,
 	},
 	{
-		name:    "ShiftAllRightAndFillUpperFromUint64x8",
+		name:    "ShiftAllRightConcatUint64x8",
 		auxType: auxInt8,
 		argLen:  2,
 		generic: true,
diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go
index 986f256887..e9a2fd70e4 100644
--- a/src/cmd/compile/internal/ssa/rewriteAMD64.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go
@@ -4443,94 +4443,94 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpSetElemUint8x16:
 		v.Op = OpAMD64VPINSRB128
 		return true
-	case OpShiftAllLeftAndFillUpperFromInt16x16:
+	case OpShiftAllLeftConcatInt16x16:
 		v.Op = OpAMD64VPSHLDW256
 		return true
-	case OpShiftAllLeftAndFillUpperFromInt16x32:
+	case OpShiftAllLeftConcatInt16x32:
 		v.Op = OpAMD64VPSHLDW512
 		return true
-	case OpShiftAllLeftAndFillUpperFromInt16x8:
+	case OpShiftAllLeftConcatInt16x8:
 		v.Op = OpAMD64VPSHLDW128
 		return true
-	case OpShiftAllLeftAndFillUpperFromInt32x16:
+	case OpShiftAllLeftConcatInt32x16:
 		v.Op = OpAMD64VPSHLDD512
 		return true
-	case OpShiftAllLeftAndFillUpperFromInt32x4:
+	case OpShiftAllLeftConcatInt32x4:
 		v.Op = OpAMD64VPSHLDD128
 		return true
-	case OpShiftAllLeftAndFillUpperFromInt32x8:
+	case OpShiftAllLeftConcatInt32x8:
 		v.Op = OpAMD64VPSHLDD256
 		return true
-	case OpShiftAllLeftAndFillUpperFromInt64x2:
+	case OpShiftAllLeftConcatInt64x2:
 		v.Op = OpAMD64VPSHLDQ128
 		return true
-	case OpShiftAllLeftAndFillUpperFromInt64x4:
+	case OpShiftAllLeftConcatInt64x4:
 		v.Op = OpAMD64VPSHLDQ256
 		return true
-	case OpShiftAllLeftAndFillUpperFromInt64x8:
+	case OpShiftAllLeftConcatInt64x8:
 		v.Op = OpAMD64VPSHLDQ512
 		return true
-	case OpShiftAllLeftAndFillUpperFromMaskedInt16x16:
-		return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedInt16x16(v)
-	case OpShiftAllLeftAndFillUpperFromMaskedInt16x32:
-		return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedInt16x32(v)
-	case OpShiftAllLeftAndFillUpperFromMaskedInt16x8:
-		return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedInt16x8(v)
-	case OpShiftAllLeftAndFillUpperFromMaskedInt32x16:
-		return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedInt32x16(v)
-	case OpShiftAllLeftAndFillUpperFromMaskedInt32x4:
-		return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedInt32x4(v)
-	case OpShiftAllLeftAndFillUpperFromMaskedInt32x8:
-		return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedInt32x8(v)
-	case OpShiftAllLeftAndFillUpperFromMaskedInt64x2:
-		return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedInt64x2(v)
-	case OpShiftAllLeftAndFillUpperFromMaskedInt64x4:
-		return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedInt64x4(v)
-	case OpShiftAllLeftAndFillUpperFromMaskedInt64x8:
-		return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedInt64x8(v)
-	case OpShiftAllLeftAndFillUpperFromMaskedUint16x16:
-		return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedUint16x16(v)
-	case OpShiftAllLeftAndFillUpperFromMaskedUint16x32:
-		return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedUint16x32(v)
-	case OpShiftAllLeftAndFillUpperFromMaskedUint16x8:
-		return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedUint16x8(v)
-	case OpShiftAllLeftAndFillUpperFromMaskedUint32x16:
-		return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedUint32x16(v)
-	case OpShiftAllLeftAndFillUpperFromMaskedUint32x4:
-		return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedUint32x4(v)
-	case OpShiftAllLeftAndFillUpperFromMaskedUint32x8:
-		return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedUint32x8(v)
-	case OpShiftAllLeftAndFillUpperFromMaskedUint64x2:
-		return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedUint64x2(v)
-	case OpShiftAllLeftAndFillUpperFromMaskedUint64x4:
-		return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedUint64x4(v)
-	case OpShiftAllLeftAndFillUpperFromMaskedUint64x8:
-		return rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedUint64x8(v)
-	case OpShiftAllLeftAndFillUpperFromUint16x16:
+	case OpShiftAllLeftConcatMaskedInt16x16:
+		return rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt16x16(v)
+	case OpShiftAllLeftConcatMaskedInt16x32:
+		return rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt16x32(v)
+	case OpShiftAllLeftConcatMaskedInt16x8:
+		return rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt16x8(v)
+	case OpShiftAllLeftConcatMaskedInt32x16:
+		return rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt32x16(v)
+	case OpShiftAllLeftConcatMaskedInt32x4:
+		return rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt32x4(v)
+	case OpShiftAllLeftConcatMaskedInt32x8:
+		return rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt32x8(v)
+	case OpShiftAllLeftConcatMaskedInt64x2:
+		return rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt64x2(v)
+	case OpShiftAllLeftConcatMaskedInt64x4:
+		return rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt64x4(v)
+	case OpShiftAllLeftConcatMaskedInt64x8:
+		return rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt64x8(v)
+	case OpShiftAllLeftConcatMaskedUint16x16:
+		return rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint16x16(v)
+	case OpShiftAllLeftConcatMaskedUint16x32:
+		return rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint16x32(v)
+	case OpShiftAllLeftConcatMaskedUint16x8:
+		return rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint16x8(v)
+	case OpShiftAllLeftConcatMaskedUint32x16:
+		return rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint32x16(v)
+	case OpShiftAllLeftConcatMaskedUint32x4:
+		return rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint32x4(v)
+	case OpShiftAllLeftConcatMaskedUint32x8:
+		return rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint32x8(v)
+	case OpShiftAllLeftConcatMaskedUint64x2:
+		return rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint64x2(v)
+	case OpShiftAllLeftConcatMaskedUint64x4:
+		return rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint64x4(v)
+	case OpShiftAllLeftConcatMaskedUint64x8:
+		return rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint64x8(v)
+	case OpShiftAllLeftConcatUint16x16:
 		v.Op = OpAMD64VPSHLDW256
 		return true
-	case OpShiftAllLeftAndFillUpperFromUint16x32:
+	case OpShiftAllLeftConcatUint16x32:
 		v.Op = OpAMD64VPSHLDW512
 		return true
-	case OpShiftAllLeftAndFillUpperFromUint16x8:
+	case OpShiftAllLeftConcatUint16x8:
 		v.Op = OpAMD64VPSHLDW128
 		return true
-	case OpShiftAllLeftAndFillUpperFromUint32x16:
+	case OpShiftAllLeftConcatUint32x16:
 		v.Op = OpAMD64VPSHLDD512
 		return true
-	case OpShiftAllLeftAndFillUpperFromUint32x4:
+	case OpShiftAllLeftConcatUint32x4:
 		v.Op = OpAMD64VPSHLDD128
 		return true
-	case OpShiftAllLeftAndFillUpperFromUint32x8:
+	case OpShiftAllLeftConcatUint32x8:
 		v.Op = OpAMD64VPSHLDD256
 		return true
-	case OpShiftAllLeftAndFillUpperFromUint64x2:
+	case OpShiftAllLeftConcatUint64x2:
 		v.Op = OpAMD64VPSHLDQ128
 		return true
-	case OpShiftAllLeftAndFillUpperFromUint64x4:
+	case OpShiftAllLeftConcatUint64x4:
 		v.Op = OpAMD64VPSHLDQ256
 		return true
-	case OpShiftAllLeftAndFillUpperFromUint64x8:
+	case OpShiftAllLeftConcatUint64x8:
 		v.Op = OpAMD64VPSHLDQ512
 		return true
 	case OpShiftAllLeftInt16x16:
@@ -4623,94 +4623,94 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpShiftAllLeftUint64x8:
 		v.Op = OpAMD64VPSLLQ512
 		return true
-	case OpShiftAllRightAndFillUpperFromInt16x16:
+	case OpShiftAllRightConcatInt16x16:
 		v.Op = OpAMD64VPSHRDW256
 		return true
-	case OpShiftAllRightAndFillUpperFromInt16x32:
+	case OpShiftAllRightConcatInt16x32:
 		v.Op = OpAMD64VPSHRDW512
 		return true
-	case OpShiftAllRightAndFillUpperFromInt16x8:
+	case OpShiftAllRightConcatInt16x8:
 		v.Op = OpAMD64VPSHRDW128
 		return true
-	case OpShiftAllRightAndFillUpperFromInt32x16:
+	case OpShiftAllRightConcatInt32x16:
 		v.Op = OpAMD64VPSHRDD512
 		return true
-	case OpShiftAllRightAndFillUpperFromInt32x4:
+	case OpShiftAllRightConcatInt32x4:
 		v.Op = OpAMD64VPSHRDD128
 		return true
-	case OpShiftAllRightAndFillUpperFromInt32x8:
+	case OpShiftAllRightConcatInt32x8:
 		v.Op = OpAMD64VPSHRDD256
 		return true
-	case OpShiftAllRightAndFillUpperFromInt64x2:
+	case OpShiftAllRightConcatInt64x2:
 		v.Op = OpAMD64VPSHRDQ128
 		return true
-	case OpShiftAllRightAndFillUpperFromInt64x4:
+	case OpShiftAllRightConcatInt64x4:
 		v.Op = OpAMD64VPSHRDQ256
 		return true
-	case OpShiftAllRightAndFillUpperFromInt64x8:
+	case OpShiftAllRightConcatInt64x8:
 		v.Op = OpAMD64VPSHRDQ512
 		return true
-	case OpShiftAllRightAndFillUpperFromMaskedInt16x16:
-		return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedInt16x16(v)
-	case OpShiftAllRightAndFillUpperFromMaskedInt16x32:
-		return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedInt16x32(v)
-	case OpShiftAllRightAndFillUpperFromMaskedInt16x8:
-		return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedInt16x8(v)
-	case OpShiftAllRightAndFillUpperFromMaskedInt32x16:
-		return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedInt32x16(v)
-	case OpShiftAllRightAndFillUpperFromMaskedInt32x4:
-		return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedInt32x4(v)
-	case OpShiftAllRightAndFillUpperFromMaskedInt32x8:
-		return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedInt32x8(v)
-	case OpShiftAllRightAndFillUpperFromMaskedInt64x2:
-		return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedInt64x2(v)
-	case OpShiftAllRightAndFillUpperFromMaskedInt64x4:
-		return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedInt64x4(v)
-	case OpShiftAllRightAndFillUpperFromMaskedInt64x8:
-		return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedInt64x8(v)
-	case OpShiftAllRightAndFillUpperFromMaskedUint16x16:
-		return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedUint16x16(v)
-	case OpShiftAllRightAndFillUpperFromMaskedUint16x32:
-		return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedUint16x32(v)
-	case OpShiftAllRightAndFillUpperFromMaskedUint16x8:
-		return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedUint16x8(v)
-	case OpShiftAllRightAndFillUpperFromMaskedUint32x16:
-		return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedUint32x16(v)
-	case OpShiftAllRightAndFillUpperFromMaskedUint32x4:
-		return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedUint32x4(v)
-	case OpShiftAllRightAndFillUpperFromMaskedUint32x8:
-		return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedUint32x8(v)
-	case OpShiftAllRightAndFillUpperFromMaskedUint64x2:
-		return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedUint64x2(v)
-	case OpShiftAllRightAndFillUpperFromMaskedUint64x4:
-		return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedUint64x4(v)
-	case OpShiftAllRightAndFillUpperFromMaskedUint64x8:
-		return rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedUint64x8(v)
-	case OpShiftAllRightAndFillUpperFromUint16x16:
+	case OpShiftAllRightConcatMaskedInt16x16:
+		return rewriteValueAMD64_OpShiftAllRightConcatMaskedInt16x16(v)
+	case OpShiftAllRightConcatMaskedInt16x32:
+		return rewriteValueAMD64_OpShiftAllRightConcatMaskedInt16x32(v)
+	case OpShiftAllRightConcatMaskedInt16x8:
+		return rewriteValueAMD64_OpShiftAllRightConcatMaskedInt16x8(v)
+	case OpShiftAllRightConcatMaskedInt32x16:
+		return rewriteValueAMD64_OpShiftAllRightConcatMaskedInt32x16(v)
+	case OpShiftAllRightConcatMaskedInt32x4:
+		return rewriteValueAMD64_OpShiftAllRightConcatMaskedInt32x4(v)
+	case OpShiftAllRightConcatMaskedInt32x8:
+		return rewriteValueAMD64_OpShiftAllRightConcatMaskedInt32x8(v)
+	case OpShiftAllRightConcatMaskedInt64x2:
+		return rewriteValueAMD64_OpShiftAllRightConcatMaskedInt64x2(v)
+	case OpShiftAllRightConcatMaskedInt64x4:
+		return rewriteValueAMD64_OpShiftAllRightConcatMaskedInt64x4(v)
+	case OpShiftAllRightConcatMaskedInt64x8:
+		return rewriteValueAMD64_OpShiftAllRightConcatMaskedInt64x8(v)
+	case OpShiftAllRightConcatMaskedUint16x16:
+		return rewriteValueAMD64_OpShiftAllRightConcatMaskedUint16x16(v)
+	case OpShiftAllRightConcatMaskedUint16x32:
+		return rewriteValueAMD64_OpShiftAllRightConcatMaskedUint16x32(v)
+	case OpShiftAllRightConcatMaskedUint16x8:
+		return rewriteValueAMD64_OpShiftAllRightConcatMaskedUint16x8(v)
+	case OpShiftAllRightConcatMaskedUint32x16:
+		return rewriteValueAMD64_OpShiftAllRightConcatMaskedUint32x16(v)
+	case OpShiftAllRightConcatMaskedUint32x4:
+		return rewriteValueAMD64_OpShiftAllRightConcatMaskedUint32x4(v)
+	case OpShiftAllRightConcatMaskedUint32x8:
+		return rewriteValueAMD64_OpShiftAllRightConcatMaskedUint32x8(v)
+	case OpShiftAllRightConcatMaskedUint64x2:
+		return rewriteValueAMD64_OpShiftAllRightConcatMaskedUint64x2(v)
+	case OpShiftAllRightConcatMaskedUint64x4:
+		return rewriteValueAMD64_OpShiftAllRightConcatMaskedUint64x4(v)
+	case OpShiftAllRightConcatMaskedUint64x8:
+		return rewriteValueAMD64_OpShiftAllRightConcatMaskedUint64x8(v)
+	case OpShiftAllRightConcatUint16x16:
 		v.Op = OpAMD64VPSHRDW256
 		return true
-	case OpShiftAllRightAndFillUpperFromUint16x32:
+	case OpShiftAllRightConcatUint16x32:
 		v.Op = OpAMD64VPSHRDW512
 		return true
-	case OpShiftAllRightAndFillUpperFromUint16x8:
+	case OpShiftAllRightConcatUint16x8:
 		v.Op = OpAMD64VPSHRDW128
 		return true
-	case OpShiftAllRightAndFillUpperFromUint32x16:
+	case OpShiftAllRightConcatUint32x16:
 		v.Op = OpAMD64VPSHRDD512
 		return true
-	case OpShiftAllRightAndFillUpperFromUint32x4:
+	case OpShiftAllRightConcatUint32x4:
 		v.Op = OpAMD64VPSHRDD128
 		return true
-	case OpShiftAllRightAndFillUpperFromUint32x8:
+	case OpShiftAllRightConcatUint32x8:
 		v.Op = OpAMD64VPSHRDD256
 		return true
-	case OpShiftAllRightAndFillUpperFromUint64x2:
+	case OpShiftAllRightConcatUint64x2:
 		v.Op = OpAMD64VPSHRDQ128
 		return true
-	case OpShiftAllRightAndFillUpperFromUint64x4:
+	case OpShiftAllRightConcatUint64x4:
 		v.Op = OpAMD64VPSHRDQ256
 		return true
-	case OpShiftAllRightAndFillUpperFromUint64x8:
+	case OpShiftAllRightConcatUint64x8:
 		v.Op = OpAMD64VPSHRDQ512
 		return true
 	case OpShiftAllRightInt16x16:
@@ -4803,94 +4803,94 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpShiftAllRightUint64x8:
 		v.Op = OpAMD64VPSRLQ512
 		return true
-	case OpShiftLeftAndFillUpperFromInt16x16:
+	case OpShiftLeftConcatInt16x16:
 		v.Op = OpAMD64VPSHLDVW256
 		return true
-	case OpShiftLeftAndFillUpperFromInt16x32:
+	case OpShiftLeftConcatInt16x32:
 		v.Op = OpAMD64VPSHLDVW512
 		return true
-	case OpShiftLeftAndFillUpperFromInt16x8:
+	case OpShiftLeftConcatInt16x8:
 		v.Op = OpAMD64VPSHLDVW128
 		return true
-	case OpShiftLeftAndFillUpperFromInt32x16:
+	case OpShiftLeftConcatInt32x16:
 		v.Op = OpAMD64VPSHLDVD512
 		return true
-	case OpShiftLeftAndFillUpperFromInt32x4:
+	case OpShiftLeftConcatInt32x4:
 		v.Op = OpAMD64VPSHLDVD128
 		return true
-	case OpShiftLeftAndFillUpperFromInt32x8:
+	case OpShiftLeftConcatInt32x8:
 		v.Op = OpAMD64VPSHLDVD256
 		return true
-	case OpShiftLeftAndFillUpperFromInt64x2:
+	case OpShiftLeftConcatInt64x2:
 		v.Op = OpAMD64VPSHLDVQ128
 		return true
-	case OpShiftLeftAndFillUpperFromInt64x4:
+	case OpShiftLeftConcatInt64x4:
 		v.Op = OpAMD64VPSHLDVQ256
 		return true
-	case OpShiftLeftAndFillUpperFromInt64x8:
+	case OpShiftLeftConcatInt64x8:
 		v.Op = OpAMD64VPSHLDVQ512
 		return true
-	case OpShiftLeftAndFillUpperFromMaskedInt16x16:
-		return rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedInt16x16(v)
-	case OpShiftLeftAndFillUpperFromMaskedInt16x32:
-		return rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedInt16x32(v)
-	case OpShiftLeftAndFillUpperFromMaskedInt16x8:
-		return rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedInt16x8(v)
-	case OpShiftLeftAndFillUpperFromMaskedInt32x16:
-		return rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedInt32x16(v)
-	case OpShiftLeftAndFillUpperFromMaskedInt32x4:
-		return rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedInt32x4(v)
-	case OpShiftLeftAndFillUpperFromMaskedInt32x8:
-		return rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedInt32x8(v)
-	case OpShiftLeftAndFillUpperFromMaskedInt64x2:
-		return rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedInt64x2(v)
-	case OpShiftLeftAndFillUpperFromMaskedInt64x4:
-		return rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedInt64x4(v)
-	case OpShiftLeftAndFillUpperFromMaskedInt64x8:
-		return rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedInt64x8(v)
-	case OpShiftLeftAndFillUpperFromMaskedUint16x16:
-		return rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedUint16x16(v)
-	case OpShiftLeftAndFillUpperFromMaskedUint16x32:
-		return rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedUint16x32(v)
-	case OpShiftLeftAndFillUpperFromMaskedUint16x8:
-		return rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedUint16x8(v)
-	case OpShiftLeftAndFillUpperFromMaskedUint32x16:
-		return rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedUint32x16(v)
-	case OpShiftLeftAndFillUpperFromMaskedUint32x4:
-		return rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedUint32x4(v)
-	case OpShiftLeftAndFillUpperFromMaskedUint32x8:
-		return rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedUint32x8(v)
-	case OpShiftLeftAndFillUpperFromMaskedUint64x2:
-		return rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedUint64x2(v)
-	case OpShiftLeftAndFillUpperFromMaskedUint64x4:
-		return rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedUint64x4(v)
-	case OpShiftLeftAndFillUpperFromMaskedUint64x8:
-		return rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedUint64x8(v)
-	case OpShiftLeftAndFillUpperFromUint16x16:
+	case OpShiftLeftConcatMaskedInt16x16:
+		return rewriteValueAMD64_OpShiftLeftConcatMaskedInt16x16(v)
+	case OpShiftLeftConcatMaskedInt16x32:
+		return rewriteValueAMD64_OpShiftLeftConcatMaskedInt16x32(v)
+	case OpShiftLeftConcatMaskedInt16x8:
+		return rewriteValueAMD64_OpShiftLeftConcatMaskedInt16x8(v)
+	case OpShiftLeftConcatMaskedInt32x16:
+		return rewriteValueAMD64_OpShiftLeftConcatMaskedInt32x16(v)
+	case OpShiftLeftConcatMaskedInt32x4:
+		return rewriteValueAMD64_OpShiftLeftConcatMaskedInt32x4(v)
+	case OpShiftLeftConcatMaskedInt32x8:
+		return rewriteValueAMD64_OpShiftLeftConcatMaskedInt32x8(v)
+	case OpShiftLeftConcatMaskedInt64x2:
+		return rewriteValueAMD64_OpShiftLeftConcatMaskedInt64x2(v)
+	case OpShiftLeftConcatMaskedInt64x4:
+		return rewriteValueAMD64_OpShiftLeftConcatMaskedInt64x4(v)
+	case OpShiftLeftConcatMaskedInt64x8:
+		return rewriteValueAMD64_OpShiftLeftConcatMaskedInt64x8(v)
+	case OpShiftLeftConcatMaskedUint16x16:
+		return rewriteValueAMD64_OpShiftLeftConcatMaskedUint16x16(v)
+	case OpShiftLeftConcatMaskedUint16x32:
+		return rewriteValueAMD64_OpShiftLeftConcatMaskedUint16x32(v)
+	case OpShiftLeftConcatMaskedUint16x8:
+		return rewriteValueAMD64_OpShiftLeftConcatMaskedUint16x8(v)
+	case OpShiftLeftConcatMaskedUint32x16:
+		return rewriteValueAMD64_OpShiftLeftConcatMaskedUint32x16(v)
+	case OpShiftLeftConcatMaskedUint32x4:
+		return rewriteValueAMD64_OpShiftLeftConcatMaskedUint32x4(v)
+	case OpShiftLeftConcatMaskedUint32x8:
+		return rewriteValueAMD64_OpShiftLeftConcatMaskedUint32x8(v)
+	case OpShiftLeftConcatMaskedUint64x2:
+		return rewriteValueAMD64_OpShiftLeftConcatMaskedUint64x2(v)
+	case OpShiftLeftConcatMaskedUint64x4:
+		return rewriteValueAMD64_OpShiftLeftConcatMaskedUint64x4(v)
+	case OpShiftLeftConcatMaskedUint64x8:
+		return rewriteValueAMD64_OpShiftLeftConcatMaskedUint64x8(v)
+	case OpShiftLeftConcatUint16x16:
 		v.Op = OpAMD64VPSHLDVW256
 		return true
-	case OpShiftLeftAndFillUpperFromUint16x32:
+	case OpShiftLeftConcatUint16x32:
 		v.Op = OpAMD64VPSHLDVW512
 		return true
-	case OpShiftLeftAndFillUpperFromUint16x8:
+	case OpShiftLeftConcatUint16x8:
 		v.Op = OpAMD64VPSHLDVW128
 		return true
-	case OpShiftLeftAndFillUpperFromUint32x16:
+	case OpShiftLeftConcatUint32x16:
 		v.Op = OpAMD64VPSHLDVD512
 		return true
-	case OpShiftLeftAndFillUpperFromUint32x4:
+	case OpShiftLeftConcatUint32x4:
 		v.Op = OpAMD64VPSHLDVD128
 		return true
-	case OpShiftLeftAndFillUpperFromUint32x8:
+	case OpShiftLeftConcatUint32x8:
 		v.Op = OpAMD64VPSHLDVD256
 		return true
-	case OpShiftLeftAndFillUpperFromUint64x2:
+	case OpShiftLeftConcatUint64x2:
 		v.Op = OpAMD64VPSHLDVQ128
 		return true
-	case OpShiftLeftAndFillUpperFromUint64x4:
+	case OpShiftLeftConcatUint64x4:
 		v.Op = OpAMD64VPSHLDVQ256
 		return true
-	case OpShiftLeftAndFillUpperFromUint64x8:
+	case OpShiftLeftConcatUint64x8:
 		v.Op = OpAMD64VPSHLDVQ512
 		return true
 	case OpShiftLeftInt16x16:
@@ -4983,94 +4983,94 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpShiftLeftUint64x8:
 		v.Op = OpAMD64VPSLLVQ512
 		return true
-	case OpShiftRightAndFillUpperFromInt16x16:
+	case OpShiftRightConcatInt16x16:
 		v.Op = OpAMD64VPSHRDVW256
 		return true
-	case OpShiftRightAndFillUpperFromInt16x32:
+	case OpShiftRightConcatInt16x32:
 		v.Op = OpAMD64VPSHRDVW512
 		return true
-	case OpShiftRightAndFillUpperFromInt16x8:
+	case OpShiftRightConcatInt16x8:
 		v.Op = OpAMD64VPSHRDVW128
 		return true
-	case OpShiftRightAndFillUpperFromInt32x16:
+	case OpShiftRightConcatInt32x16:
 		v.Op = OpAMD64VPSHRDVD512
 		return true
-	case OpShiftRightAndFillUpperFromInt32x4:
+	case OpShiftRightConcatInt32x4:
 		v.Op = OpAMD64VPSHRDVD128
 		return true
-	case OpShiftRightAndFillUpperFromInt32x8:
+	case OpShiftRightConcatInt32x8:
 		v.Op = OpAMD64VPSHRDVD256
 		return true
-	case OpShiftRightAndFillUpperFromInt64x2:
+	case OpShiftRightConcatInt64x2:
 		v.Op = OpAMD64VPSHRDVQ128
 		return true
-	case OpShiftRightAndFillUpperFromInt64x4:
+	case OpShiftRightConcatInt64x4:
 		v.Op = OpAMD64VPSHRDVQ256
 		return true
-	case OpShiftRightAndFillUpperFromInt64x8:
+	case OpShiftRightConcatInt64x8:
 		v.Op = OpAMD64VPSHRDVQ512
 		return true
-	case OpShiftRightAndFillUpperFromMaskedInt16x16:
-		return rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedInt16x16(v)
-	case OpShiftRightAndFillUpperFromMaskedInt16x32:
-		return rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedInt16x32(v)
-	case OpShiftRightAndFillUpperFromMaskedInt16x8:
-		return rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedInt16x8(v)
-	case OpShiftRightAndFillUpperFromMaskedInt32x16:
-		return rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedInt32x16(v)
-	case OpShiftRightAndFillUpperFromMaskedInt32x4:
-		return rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedInt32x4(v)
-	case OpShiftRightAndFillUpperFromMaskedInt32x8:
-		return rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedInt32x8(v)
-	case OpShiftRightAndFillUpperFromMaskedInt64x2:
-		return rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedInt64x2(v)
-	case OpShiftRightAndFillUpperFromMaskedInt64x4:
-		return rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedInt64x4(v)
-	case OpShiftRightAndFillUpperFromMaskedInt64x8:
-		return rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedInt64x8(v)
-	case OpShiftRightAndFillUpperFromMaskedUint16x16:
-		return rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedUint16x16(v)
-	case OpShiftRightAndFillUpperFromMaskedUint16x32:
-		return rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedUint16x32(v)
-	case OpShiftRightAndFillUpperFromMaskedUint16x8:
-		return rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedUint16x8(v)
-	case OpShiftRightAndFillUpperFromMaskedUint32x16:
-		return rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedUint32x16(v)
-	case OpShiftRightAndFillUpperFromMaskedUint32x4:
-		return rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedUint32x4(v)
-	case OpShiftRightAndFillUpperFromMaskedUint32x8:
-		return rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedUint32x8(v)
-	case OpShiftRightAndFillUpperFromMaskedUint64x2:
-		return rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedUint64x2(v)
-	case OpShiftRightAndFillUpperFromMaskedUint64x4:
-		return rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedUint64x4(v)
-	case OpShiftRightAndFillUpperFromMaskedUint64x8:
-		return rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedUint64x8(v)
-	case OpShiftRightAndFillUpperFromUint16x16:
+	case OpShiftRightConcatMaskedInt16x16:
+		return rewriteValueAMD64_OpShiftRightConcatMaskedInt16x16(v)
+	case OpShiftRightConcatMaskedInt16x32:
+		return rewriteValueAMD64_OpShiftRightConcatMaskedInt16x32(v)
+	case OpShiftRightConcatMaskedInt16x8:
+		return rewriteValueAMD64_OpShiftRightConcatMaskedInt16x8(v)
+	case OpShiftRightConcatMaskedInt32x16:
+		return rewriteValueAMD64_OpShiftRightConcatMaskedInt32x16(v)
+	case OpShiftRightConcatMaskedInt32x4:
+		return rewriteValueAMD64_OpShiftRightConcatMaskedInt32x4(v)
+	case OpShiftRightConcatMaskedInt32x8:
+		return rewriteValueAMD64_OpShiftRightConcatMaskedInt32x8(v)
+	case OpShiftRightConcatMaskedInt64x2:
+		return rewriteValueAMD64_OpShiftRightConcatMaskedInt64x2(v)
+	case OpShiftRightConcatMaskedInt64x4:
+		return rewriteValueAMD64_OpShiftRightConcatMaskedInt64x4(v)
+	case OpShiftRightConcatMaskedInt64x8:
+		return rewriteValueAMD64_OpShiftRightConcatMaskedInt64x8(v)
+	case OpShiftRightConcatMaskedUint16x16:
+		return rewriteValueAMD64_OpShiftRightConcatMaskedUint16x16(v)
+	case OpShiftRightConcatMaskedUint16x32:
+		return rewriteValueAMD64_OpShiftRightConcatMaskedUint16x32(v)
+	case OpShiftRightConcatMaskedUint16x8:
+		return rewriteValueAMD64_OpShiftRightConcatMaskedUint16x8(v)
+	case OpShiftRightConcatMaskedUint32x16:
+		return rewriteValueAMD64_OpShiftRightConcatMaskedUint32x16(v)
+	case OpShiftRightConcatMaskedUint32x4:
+		return rewriteValueAMD64_OpShiftRightConcatMaskedUint32x4(v)
+	case OpShiftRightConcatMaskedUint32x8:
+		return rewriteValueAMD64_OpShiftRightConcatMaskedUint32x8(v)
+	case OpShiftRightConcatMaskedUint64x2:
+		return rewriteValueAMD64_OpShiftRightConcatMaskedUint64x2(v)
+	case OpShiftRightConcatMaskedUint64x4:
+		return rewriteValueAMD64_OpShiftRightConcatMaskedUint64x4(v)
+	case OpShiftRightConcatMaskedUint64x8:
+		return rewriteValueAMD64_OpShiftRightConcatMaskedUint64x8(v)
+	case OpShiftRightConcatUint16x16:
 		v.Op = OpAMD64VPSHRDVW256
 		return true
-	case OpShiftRightAndFillUpperFromUint16x32:
+	case OpShiftRightConcatUint16x32:
 		v.Op = OpAMD64VPSHRDVW512
 		return true
-	case OpShiftRightAndFillUpperFromUint16x8:
+	case OpShiftRightConcatUint16x8:
 		v.Op = OpAMD64VPSHRDVW128
 		return true
-	case OpShiftRightAndFillUpperFromUint32x16:
+	case OpShiftRightConcatUint32x16:
 		v.Op = OpAMD64VPSHRDVD512
 		return true
-	case OpShiftRightAndFillUpperFromUint32x4:
+	case OpShiftRightConcatUint32x4:
 		v.Op = OpAMD64VPSHRDVD128
 		return true
-	case OpShiftRightAndFillUpperFromUint32x8:
+	case OpShiftRightConcatUint32x8:
 		v.Op = OpAMD64VPSHRDVD256
 		return true
-	case OpShiftRightAndFillUpperFromUint64x2:
+	case OpShiftRightConcatUint64x2:
 		v.Op = OpAMD64VPSHRDVQ128
 		return true
-	case OpShiftRightAndFillUpperFromUint64x4:
+	case OpShiftRightConcatUint64x4:
 		v.Op = OpAMD64VPSHRDVQ256
 		return true
-	case OpShiftRightAndFillUpperFromUint64x8:
+	case OpShiftRightConcatUint64x8:
 		v.Op = OpAMD64VPSHRDVQ512
 		return true
 	case OpShiftRightInt16x16:
@@ -50752,12 +50752,12 @@ func rewriteValueAMD64_OpSelectN(v *Value) bool {
 	}
 	return false
 }
-func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedInt16x16(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt16x16(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftAllLeftAndFillUpperFromMaskedInt16x16 [a] x y mask)
+	// match: (ShiftAllLeftConcatMaskedInt16x16 [a] x y mask)
 	// result: (VPSHLDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
 	for {
 		a := auxIntToInt8(v.AuxInt)
@@ -50772,12 +50772,12 @@ func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedInt16x16(v *Value) bo
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedInt16x32(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt16x32(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftAllLeftAndFillUpperFromMaskedInt16x32 [a] x y mask)
+	// match: (ShiftAllLeftConcatMaskedInt16x32 [a] x y mask)
 	// result: (VPSHLDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
 	for {
 		a := auxIntToInt8(v.AuxInt)
@@ -50792,12 +50792,12 @@ func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedInt16x32(v *Value) bo
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedInt16x8(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt16x8(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftAllLeftAndFillUpperFromMaskedInt16x8 [a] x y mask)
+	// match: (ShiftAllLeftConcatMaskedInt16x8 [a] x y mask)
 	// result: (VPSHLDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
 	for {
 		a := auxIntToInt8(v.AuxInt)
@@ -50812,12 +50812,12 @@ func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedInt16x8(v *Value) boo
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedInt32x16(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt32x16(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftAllLeftAndFillUpperFromMaskedInt32x16 [a] x y mask)
+	// match: (ShiftAllLeftConcatMaskedInt32x16 [a] x y mask)
 	// result: (VPSHLDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
 	for {
 		a := auxIntToInt8(v.AuxInt)
@@ -50832,12 +50832,12 @@ func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedInt32x16(v *Value) bo
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedInt32x4(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt32x4(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftAllLeftAndFillUpperFromMaskedInt32x4 [a] x y mask)
+	// match: (ShiftAllLeftConcatMaskedInt32x4 [a] x y mask)
 	// result: (VPSHLDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
 	for {
 		a := auxIntToInt8(v.AuxInt)
@@ -50852,12 +50852,12 @@ func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedInt32x4(v *Value) boo
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedInt32x8(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt32x8(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftAllLeftAndFillUpperFromMaskedInt32x8 [a] x y mask)
+	// match: (ShiftAllLeftConcatMaskedInt32x8 [a] x y mask)
 	// result: (VPSHLDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
 	for {
 		a := auxIntToInt8(v.AuxInt)
@@ -50872,12 +50872,12 @@ func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedInt32x8(v *Value) boo
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedInt64x2(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt64x2(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftAllLeftAndFillUpperFromMaskedInt64x2 [a] x y mask)
+	// match: (ShiftAllLeftConcatMaskedInt64x2 [a] x y mask)
 	// result: (VPSHLDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
 	for {
 		a := auxIntToInt8(v.AuxInt)
@@ -50892,12 +50892,12 @@ func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedInt64x2(v *Value) boo
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedInt64x4(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt64x4(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftAllLeftAndFillUpperFromMaskedInt64x4 [a] x y mask)
+	// match: (ShiftAllLeftConcatMaskedInt64x4 [a] x y mask)
 	// result: (VPSHLDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
 	for {
 		a := auxIntToInt8(v.AuxInt)
@@ -50912,12 +50912,12 @@ func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedInt64x4(v *Value) boo
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedInt64x8(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt64x8(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftAllLeftAndFillUpperFromMaskedInt64x8 [a] x y mask)
+	// match: (ShiftAllLeftConcatMaskedInt64x8 [a] x y mask)
 	// result: (VPSHLDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 	for {
 		a := auxIntToInt8(v.AuxInt)
@@ -50932,12 +50932,12 @@ func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedInt64x8(v *Value) boo
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedUint16x16(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint16x16(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftAllLeftAndFillUpperFromMaskedUint16x16 [a] x y mask)
+	// match: (ShiftAllLeftConcatMaskedUint16x16 [a] x y mask)
 	// result: (VPSHLDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
 	for {
 		a := auxIntToInt8(v.AuxInt)
@@ -50952,12 +50952,12 @@ func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedUint16x16(v *Value) b
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedUint16x32(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint16x32(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftAllLeftAndFillUpperFromMaskedUint16x32 [a] x y mask)
+	// match: (ShiftAllLeftConcatMaskedUint16x32 [a] x y mask)
 	// result: (VPSHLDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
 	for {
 		a := auxIntToInt8(v.AuxInt)
@@ -50972,12 +50972,12 @@ func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedUint16x32(v *Value) b
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedUint16x8(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint16x8(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftAllLeftAndFillUpperFromMaskedUint16x8 [a] x y mask)
+	// match: (ShiftAllLeftConcatMaskedUint16x8 [a] x y mask)
 	// result: (VPSHLDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
 	for {
 		a := auxIntToInt8(v.AuxInt)
@@ -50992,12 +50992,12 @@ func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedUint16x8(v *Value) bo
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedUint32x16(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint32x16(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftAllLeftAndFillUpperFromMaskedUint32x16 [a] x y mask)
+	// match: (ShiftAllLeftConcatMaskedUint32x16 [a] x y mask)
 	// result: (VPSHLDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
 	for {
 		a := auxIntToInt8(v.AuxInt)
@@ -51012,12 +51012,12 @@ func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedUint32x16(v *Value) b
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedUint32x4(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint32x4(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftAllLeftAndFillUpperFromMaskedUint32x4 [a] x y mask)
+	// match: (ShiftAllLeftConcatMaskedUint32x4 [a] x y mask)
 	// result: (VPSHLDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
 	for {
 		a := auxIntToInt8(v.AuxInt)
@@ -51032,12 +51032,12 @@ func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedUint32x4(v *Value) bo
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedUint32x8(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint32x8(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftAllLeftAndFillUpperFromMaskedUint32x8 [a] x y mask)
+	// match: (ShiftAllLeftConcatMaskedUint32x8 [a] x y mask)
 	// result: (VPSHLDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
 	for {
 		a := auxIntToInt8(v.AuxInt)
@@ -51052,12 +51052,12 @@ func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedUint32x8(v *Value) bo
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedUint64x2(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint64x2(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftAllLeftAndFillUpperFromMaskedUint64x2 [a] x y mask)
+	// match: (ShiftAllLeftConcatMaskedUint64x2 [a] x y mask)
 	// result: (VPSHLDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
 	for {
 		a := auxIntToInt8(v.AuxInt)
@@ -51072,12 +51072,12 @@ func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedUint64x2(v *Value) bo
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedUint64x4(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint64x4(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftAllLeftAndFillUpperFromMaskedUint64x4 [a] x y mask)
+	// match: (ShiftAllLeftConcatMaskedUint64x4 [a] x y mask)
 	// result: (VPSHLDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
 	for {
 		a := auxIntToInt8(v.AuxInt)
@@ -51092,12 +51092,12 @@ func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedUint64x4(v *Value) bo
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftAllLeftAndFillUpperFromMaskedUint64x8(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint64x8(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftAllLeftAndFillUpperFromMaskedUint64x8 [a] x y mask)
+	// match: (ShiftAllLeftConcatMaskedUint64x8 [a] x y mask)
 	// result: (VPSHLDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 	for {
 		a := auxIntToInt8(v.AuxInt)
@@ -51436,12 +51436,12 @@ func rewriteValueAMD64_OpShiftAllLeftMaskedUint64x8(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedInt16x16(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt16x16(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftAllRightAndFillUpperFromMaskedInt16x16 [a] x y mask)
+	// match: (ShiftAllRightConcatMaskedInt16x16 [a] x y mask)
 	// result: (VPSHRDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
 	for {
 		a := auxIntToInt8(v.AuxInt)
@@ -51456,12 +51456,12 @@ func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedInt16x16(v *Value) b
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedInt16x32(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt16x32(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftAllRightAndFillUpperFromMaskedInt16x32 [a] x y mask)
+	// match: (ShiftAllRightConcatMaskedInt16x32 [a] x y mask)
 	// result: (VPSHRDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
 	for {
 		a := auxIntToInt8(v.AuxInt)
@@ -51476,12 +51476,12 @@ func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedInt16x32(v *Value) b
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedInt16x8(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt16x8(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftAllRightAndFillUpperFromMaskedInt16x8 [a] x y mask)
+	// match: (ShiftAllRightConcatMaskedInt16x8 [a] x y mask)
 	// result: (VPSHRDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
 	for {
 		a := auxIntToInt8(v.AuxInt)
@@ -51496,12 +51496,12 @@ func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedInt16x8(v *Value) bo
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedInt32x16(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt32x16(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftAllRightAndFillUpperFromMaskedInt32x16 [a] x y mask)
+	// match: (ShiftAllRightConcatMaskedInt32x16 [a] x y mask)
 	// result: (VPSHRDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
 	for {
 		a := auxIntToInt8(v.AuxInt)
@@ -51516,12 +51516,12 @@ func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedInt32x16(v *Value) b
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedInt32x4(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt32x4(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftAllRightAndFillUpperFromMaskedInt32x4 [a] x y mask)
+	// match: (ShiftAllRightConcatMaskedInt32x4 [a] x y mask)
 	// result: (VPSHRDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
 	for {
 		a := auxIntToInt8(v.AuxInt)
@@ -51536,12 +51536,12 @@ func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedInt32x4(v *Value) bo
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedInt32x8(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt32x8(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftAllRightAndFillUpperFromMaskedInt32x8 [a] x y mask)
+	// match: (ShiftAllRightConcatMaskedInt32x8 [a] x y mask)
 	// result: (VPSHRDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
 	for {
 		a := auxIntToInt8(v.AuxInt)
@@ -51556,12 +51556,12 @@ func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedInt32x8(v *Value) bo
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedInt64x2(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt64x2(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftAllRightAndFillUpperFromMaskedInt64x2 [a] x y mask)
+	// match: (ShiftAllRightConcatMaskedInt64x2 [a] x y mask)
 	// result: (VPSHRDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
 	for {
 		a := auxIntToInt8(v.AuxInt)
@@ -51576,12 +51576,12 @@ func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedInt64x2(v *Value) bo
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedInt64x4(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt64x4(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftAllRightAndFillUpperFromMaskedInt64x4 [a] x y mask)
+	// match: (ShiftAllRightConcatMaskedInt64x4 [a] x y mask)
 	// result: (VPSHRDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
 	for {
 		a := auxIntToInt8(v.AuxInt)
@@ -51596,12 +51596,12 @@ func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedInt64x4(v *Value) bo
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedInt64x8(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt64x8(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftAllRightAndFillUpperFromMaskedInt64x8 [a] x y mask)
+	// match: (ShiftAllRightConcatMaskedInt64x8 [a] x y mask)
 	// result: (VPSHRDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 	for {
 		a := auxIntToInt8(v.AuxInt)
@@ -51616,12 +51616,12 @@ func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedInt64x8(v *Value) bo
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedUint16x16(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint16x16(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftAllRightAndFillUpperFromMaskedUint16x16 [a] x y mask)
+	// match: (ShiftAllRightConcatMaskedUint16x16 [a] x y mask)
 	// result: (VPSHRDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
 	for {
 		a := auxIntToInt8(v.AuxInt)
@@ -51636,12 +51636,12 @@ func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedUint16x16(v *Value)
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedUint16x32(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint16x32(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftAllRightAndFillUpperFromMaskedUint16x32 [a] x y mask)
+	// match: (ShiftAllRightConcatMaskedUint16x32 [a] x y mask)
 	// result: (VPSHRDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
 	for {
 		a := auxIntToInt8(v.AuxInt)
@@ -51656,12 +51656,12 @@ func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedUint16x32(v *Value)
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedUint16x8(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint16x8(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftAllRightAndFillUpperFromMaskedUint16x8 [a] x y mask)
+	// match: (ShiftAllRightConcatMaskedUint16x8 [a] x y mask)
 	// result: (VPSHRDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
 	for {
 		a := auxIntToInt8(v.AuxInt)
@@ -51676,12 +51676,12 @@ func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedUint16x8(v *Value) b
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedUint32x16(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint32x16(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftAllRightAndFillUpperFromMaskedUint32x16 [a] x y mask)
+	// match: (ShiftAllRightConcatMaskedUint32x16 [a] x y mask)
 	// result: (VPSHRDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
 	for {
 		a := auxIntToInt8(v.AuxInt)
@@ -51696,12 +51696,12 @@ func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedUint32x16(v *Value)
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedUint32x4(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint32x4(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftAllRightAndFillUpperFromMaskedUint32x4 [a] x y mask)
+	// match: (ShiftAllRightConcatMaskedUint32x4 [a] x y mask)
 	// result: (VPSHRDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
 	for {
 		a := auxIntToInt8(v.AuxInt)
@@ -51716,12 +51716,12 @@ func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedUint32x4(v *Value) b
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedUint32x8(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint32x8(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftAllRightAndFillUpperFromMaskedUint32x8 [a] x y mask)
+	// match: (ShiftAllRightConcatMaskedUint32x8 [a] x y mask)
 	// result: (VPSHRDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
 	for {
 		a := auxIntToInt8(v.AuxInt)
@@ -51736,12 +51736,12 @@ func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedUint32x8(v *Value) b
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedUint64x2(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint64x2(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftAllRightAndFillUpperFromMaskedUint64x2 [a] x y mask)
+	// match: (ShiftAllRightConcatMaskedUint64x2 [a] x y mask)
 	// result: (VPSHRDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
 	for {
 		a := auxIntToInt8(v.AuxInt)
@@ -51756,12 +51756,12 @@ func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedUint64x2(v *Value) b
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedUint64x4(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint64x4(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftAllRightAndFillUpperFromMaskedUint64x4 [a] x y mask)
+	// match: (ShiftAllRightConcatMaskedUint64x4 [a] x y mask)
 	// result: (VPSHRDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
 	for {
 		a := auxIntToInt8(v.AuxInt)
@@ -51776,12 +51776,12 @@ func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedUint64x4(v *Value) b
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftAllRightAndFillUpperFromMaskedUint64x8(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint64x8(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftAllRightAndFillUpperFromMaskedUint64x8 [a] x y mask)
+	// match: (ShiftAllRightConcatMaskedUint64x8 [a] x y mask)
 	// result: (VPSHRDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 	for {
 		a := auxIntToInt8(v.AuxInt)
@@ -52120,13 +52120,13 @@ func rewriteValueAMD64_OpShiftAllRightMaskedUint64x8(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedInt16x16(v *Value) bool {
+func rewriteValueAMD64_OpShiftLeftConcatMaskedInt16x16(v *Value) bool {
 	v_3 := v.Args[3]
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftLeftAndFillUpperFromMaskedInt16x16 x y z mask)
+	// match: (ShiftLeftConcatMaskedInt16x16 x y z mask)
 	// result: (VPSHLDVWMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
 	for {
 		x := v_0
@@ -52140,13 +52140,13 @@ func rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedInt16x16(v *Value) bool
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedInt16x32(v *Value) bool {
+func rewriteValueAMD64_OpShiftLeftConcatMaskedInt16x32(v *Value) bool {
 	v_3 := v.Args[3]
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftLeftAndFillUpperFromMaskedInt16x32 x y z mask)
+	// match: (ShiftLeftConcatMaskedInt16x32 x y z mask)
 	// result: (VPSHLDVWMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
 	for {
 		x := v_0
@@ -52160,13 +52160,13 @@ func rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedInt16x32(v *Value) bool
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedInt16x8(v *Value) bool {
+func rewriteValueAMD64_OpShiftLeftConcatMaskedInt16x8(v *Value) bool {
 	v_3 := v.Args[3]
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftLeftAndFillUpperFromMaskedInt16x8 x y z mask)
+	// match: (ShiftLeftConcatMaskedInt16x8 x y z mask)
 	// result: (VPSHLDVWMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
 	for {
 		x := v_0
@@ -52180,13 +52180,13 @@ func rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedInt16x8(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedInt32x16(v *Value) bool {
+func rewriteValueAMD64_OpShiftLeftConcatMaskedInt32x16(v *Value) bool {
 	v_3 := v.Args[3]
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftLeftAndFillUpperFromMaskedInt32x16 x y z mask)
+	// match: (ShiftLeftConcatMaskedInt32x16 x y z mask)
 	// result: (VPSHLDVDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
 	for {
 		x := v_0
@@ -52200,13 +52200,13 @@ func rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedInt32x16(v *Value) bool
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedInt32x4(v *Value) bool {
+func rewriteValueAMD64_OpShiftLeftConcatMaskedInt32x4(v *Value) bool {
 	v_3 := v.Args[3]
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftLeftAndFillUpperFromMaskedInt32x4 x y z mask)
+	// match: (ShiftLeftConcatMaskedInt32x4 x y z mask)
 	// result: (VPSHLDVDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
 	for {
 		x := v_0
@@ -52220,13 +52220,13 @@ func rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedInt32x4(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedInt32x8(v *Value) bool {
+func rewriteValueAMD64_OpShiftLeftConcatMaskedInt32x8(v *Value) bool {
 	v_3 := v.Args[3]
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftLeftAndFillUpperFromMaskedInt32x8 x y z mask)
+	// match: (ShiftLeftConcatMaskedInt32x8 x y z mask)
 	// result: (VPSHLDVDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
 	for {
 		x := v_0
@@ -52240,13 +52240,13 @@ func rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedInt32x8(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedInt64x2(v *Value) bool {
+func rewriteValueAMD64_OpShiftLeftConcatMaskedInt64x2(v *Value) bool {
 	v_3 := v.Args[3]
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftLeftAndFillUpperFromMaskedInt64x2 x y z mask)
+	// match: (ShiftLeftConcatMaskedInt64x2 x y z mask)
 	// result: (VPSHLDVQMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
 	for {
 		x := v_0
@@ -52260,13 +52260,13 @@ func rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedInt64x2(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedInt64x4(v *Value) bool {
+func rewriteValueAMD64_OpShiftLeftConcatMaskedInt64x4(v *Value) bool {
 	v_3 := v.Args[3]
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftLeftAndFillUpperFromMaskedInt64x4 x y z mask)
+	// match: (ShiftLeftConcatMaskedInt64x4 x y z mask)
 	// result: (VPSHLDVQMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
 	for {
 		x := v_0
@@ -52280,13 +52280,13 @@ func rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedInt64x4(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedInt64x8(v *Value) bool {
+func rewriteValueAMD64_OpShiftLeftConcatMaskedInt64x8(v *Value) bool {
 	v_3 := v.Args[3]
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftLeftAndFillUpperFromMaskedInt64x8 x y z mask)
+	// match: (ShiftLeftConcatMaskedInt64x8 x y z mask)
 	// result: (VPSHLDVQMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
 	for {
 		x := v_0
@@ -52300,13 +52300,13 @@ func rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedInt64x8(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedUint16x16(v *Value) bool {
+func rewriteValueAMD64_OpShiftLeftConcatMaskedUint16x16(v *Value) bool {
 	v_3 := v.Args[3]
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftLeftAndFillUpperFromMaskedUint16x16 x y z mask)
+	// match: (ShiftLeftConcatMaskedUint16x16 x y z mask)
 	// result: (VPSHLDVWMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
 	for {
 		x := v_0
@@ -52320,13 +52320,13 @@ func rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedUint16x16(v *Value) bool
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedUint16x32(v *Value) bool {
+func rewriteValueAMD64_OpShiftLeftConcatMaskedUint16x32(v *Value) bool {
 	v_3 := v.Args[3]
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftLeftAndFillUpperFromMaskedUint16x32 x y z mask)
+	// match: (ShiftLeftConcatMaskedUint16x32 x y z mask)
 	// result: (VPSHLDVWMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
 	for {
 		x := v_0
@@ -52340,13 +52340,13 @@ func rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedUint16x32(v *Value) bool
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedUint16x8(v *Value) bool {
+func rewriteValueAMD64_OpShiftLeftConcatMaskedUint16x8(v *Value) bool {
 	v_3 := v.Args[3]
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftLeftAndFillUpperFromMaskedUint16x8 x y z mask)
+	// match: (ShiftLeftConcatMaskedUint16x8 x y z mask)
 	// result: (VPSHLDVWMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
 	for {
 		x := v_0
@@ -52360,13 +52360,13 @@ func rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedUint16x8(v *Value) bool
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedUint32x16(v *Value) bool {
+func rewriteValueAMD64_OpShiftLeftConcatMaskedUint32x16(v *Value) bool {
 	v_3 := v.Args[3]
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftLeftAndFillUpperFromMaskedUint32x16 x y z mask)
+	// match: (ShiftLeftConcatMaskedUint32x16 x y z mask)
 	// result: (VPSHLDVDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
 	for {
 		x := v_0
@@ -52380,13 +52380,13 @@ func rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedUint32x16(v *Value) bool
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedUint32x4(v *Value) bool {
+func rewriteValueAMD64_OpShiftLeftConcatMaskedUint32x4(v *Value) bool {
 	v_3 := v.Args[3]
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftLeftAndFillUpperFromMaskedUint32x4 x y z mask)
+	// match: (ShiftLeftConcatMaskedUint32x4 x y z mask)
 	// result: (VPSHLDVDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
 	for {
 		x := v_0
@@ -52400,13 +52400,13 @@ func rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedUint32x4(v *Value) bool
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedUint32x8(v *Value) bool {
+func rewriteValueAMD64_OpShiftLeftConcatMaskedUint32x8(v *Value) bool {
 	v_3 := v.Args[3]
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftLeftAndFillUpperFromMaskedUint32x8 x y z mask)
+	// match: (ShiftLeftConcatMaskedUint32x8 x y z mask)
 	// result: (VPSHLDVDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
 	for {
 		x := v_0
@@ -52420,13 +52420,13 @@ func rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedUint32x8(v *Value) bool
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedUint64x2(v *Value) bool {
+func rewriteValueAMD64_OpShiftLeftConcatMaskedUint64x2(v *Value) bool {
 	v_3 := v.Args[3]
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftLeftAndFillUpperFromMaskedUint64x2 x y z mask)
+	// match: (ShiftLeftConcatMaskedUint64x2 x y z mask)
 	// result: (VPSHLDVQMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
 	for {
 		x := v_0
@@ -52440,13 +52440,13 @@ func rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedUint64x2(v *Value) bool
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedUint64x4(v *Value) bool {
+func rewriteValueAMD64_OpShiftLeftConcatMaskedUint64x4(v *Value) bool {
 	v_3 := v.Args[3]
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftLeftAndFillUpperFromMaskedUint64x4 x y z mask)
+	// match: (ShiftLeftConcatMaskedUint64x4 x y z mask)
 	// result: (VPSHLDVQMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
 	for {
 		x := v_0
@@ -52460,13 +52460,13 @@ func rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedUint64x4(v *Value) bool
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftLeftAndFillUpperFromMaskedUint64x8(v *Value) bool {
+func rewriteValueAMD64_OpShiftLeftConcatMaskedUint64x8(v *Value) bool {
 	v_3 := v.Args[3]
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftLeftAndFillUpperFromMaskedUint64x8 x y z mask)
+	// match: (ShiftLeftConcatMaskedUint64x8 x y z mask)
 	// result: (VPSHLDVQMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
 	for {
 		x := v_0
@@ -52804,13 +52804,13 @@ func rewriteValueAMD64_OpShiftLeftMaskedUint64x8(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedInt16x16(v *Value) bool {
+func rewriteValueAMD64_OpShiftRightConcatMaskedInt16x16(v *Value) bool {
 	v_3 := v.Args[3]
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftRightAndFillUpperFromMaskedInt16x16 x y z mask)
+	// match: (ShiftRightConcatMaskedInt16x16 x y z mask)
 	// result: (VPSHRDVWMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
 	for {
 		x := v_0
@@ -52824,13 +52824,13 @@ func rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedInt16x16(v *Value) bool
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedInt16x32(v *Value) bool {
+func rewriteValueAMD64_OpShiftRightConcatMaskedInt16x32(v *Value) bool {
 	v_3 := v.Args[3]
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftRightAndFillUpperFromMaskedInt16x32 x y z mask)
+	// match: (ShiftRightConcatMaskedInt16x32 x y z mask)
 	// result: (VPSHRDVWMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
 	for {
 		x := v_0
@@ -52844,13 +52844,13 @@ func rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedInt16x32(v *Value) bool
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedInt16x8(v *Value) bool {
+func rewriteValueAMD64_OpShiftRightConcatMaskedInt16x8(v *Value) bool {
 	v_3 := v.Args[3]
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftRightAndFillUpperFromMaskedInt16x8 x y z mask)
+	// match: (ShiftRightConcatMaskedInt16x8 x y z mask)
 	// result: (VPSHRDVWMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
 	for {
 		x := v_0
@@ -52864,13 +52864,13 @@ func rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedInt16x8(v *Value) bool
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedInt32x16(v *Value) bool {
+func rewriteValueAMD64_OpShiftRightConcatMaskedInt32x16(v *Value) bool {
 	v_3 := v.Args[3]
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftRightAndFillUpperFromMaskedInt32x16 x y z mask)
+	// match: (ShiftRightConcatMaskedInt32x16 x y z mask)
 	// result: (VPSHRDVDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
 	for {
 		x := v_0
@@ -52884,13 +52884,13 @@ func rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedInt32x16(v *Value) bool
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedInt32x4(v *Value) bool {
+func rewriteValueAMD64_OpShiftRightConcatMaskedInt32x4(v *Value) bool {
 	v_3 := v.Args[3]
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftRightAndFillUpperFromMaskedInt32x4 x y z mask)
+	// match: (ShiftRightConcatMaskedInt32x4 x y z mask)
 	// result: (VPSHRDVDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
 	for {
 		x := v_0
@@ -52904,13 +52904,13 @@ func rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedInt32x4(v *Value) bool
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedInt32x8(v *Value) bool {
+func rewriteValueAMD64_OpShiftRightConcatMaskedInt32x8(v *Value) bool {
 	v_3 := v.Args[3]
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftRightAndFillUpperFromMaskedInt32x8 x y z mask)
+	// match: (ShiftRightConcatMaskedInt32x8 x y z mask)
 	// result: (VPSHRDVDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
 	for {
 		x := v_0
@@ -52924,13 +52924,13 @@ func rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedInt32x8(v *Value) bool
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedInt64x2(v *Value) bool {
+func rewriteValueAMD64_OpShiftRightConcatMaskedInt64x2(v *Value) bool {
 	v_3 := v.Args[3]
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftRightAndFillUpperFromMaskedInt64x2 x y z mask)
+	// match: (ShiftRightConcatMaskedInt64x2 x y z mask)
 	// result: (VPSHRDVQMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
 	for {
 		x := v_0
@@ -52944,13 +52944,13 @@ func rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedInt64x2(v *Value) bool
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedInt64x4(v *Value) bool {
+func rewriteValueAMD64_OpShiftRightConcatMaskedInt64x4(v *Value) bool {
 	v_3 := v.Args[3]
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftRightAndFillUpperFromMaskedInt64x4 x y z mask)
+	// match: (ShiftRightConcatMaskedInt64x4 x y z mask)
 	// result: (VPSHRDVQMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
 	for {
 		x := v_0
@@ -52964,13 +52964,13 @@ func rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedInt64x4(v *Value) bool
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedInt64x8(v *Value) bool {
+func rewriteValueAMD64_OpShiftRightConcatMaskedInt64x8(v *Value) bool {
 	v_3 := v.Args[3]
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftRightAndFillUpperFromMaskedInt64x8 x y z mask)
+	// match: (ShiftRightConcatMaskedInt64x8 x y z mask)
 	// result: (VPSHRDVQMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
 	for {
 		x := v_0
@@ -52984,13 +52984,13 @@ func rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedInt64x8(v *Value) bool
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedUint16x16(v *Value) bool {
+func rewriteValueAMD64_OpShiftRightConcatMaskedUint16x16(v *Value) bool {
 	v_3 := v.Args[3]
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftRightAndFillUpperFromMaskedUint16x16 x y z mask)
+	// match: (ShiftRightConcatMaskedUint16x16 x y z mask)
 	// result: (VPSHRDVWMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
 	for {
 		x := v_0
@@ -53004,13 +53004,13 @@ func rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedUint16x16(v *Value) boo
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedUint16x32(v *Value) bool {
+func rewriteValueAMD64_OpShiftRightConcatMaskedUint16x32(v *Value) bool {
 	v_3 := v.Args[3]
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftRightAndFillUpperFromMaskedUint16x32 x y z mask)
+	// match: (ShiftRightConcatMaskedUint16x32 x y z mask)
 	// result: (VPSHRDVWMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
 	for {
 		x := v_0
@@ -53024,13 +53024,13 @@ func rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedUint16x32(v *Value) boo
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedUint16x8(v *Value) bool {
+func rewriteValueAMD64_OpShiftRightConcatMaskedUint16x8(v *Value) bool {
 	v_3 := v.Args[3]
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftRightAndFillUpperFromMaskedUint16x8 x y z mask)
+	// match: (ShiftRightConcatMaskedUint16x8 x y z mask)
 	// result: (VPSHRDVWMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
 	for {
 		x := v_0
@@ -53044,13 +53044,13 @@ func rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedUint16x8(v *Value) bool
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedUint32x16(v *Value) bool {
+func rewriteValueAMD64_OpShiftRightConcatMaskedUint32x16(v *Value) bool {
 	v_3 := v.Args[3]
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftRightAndFillUpperFromMaskedUint32x16 x y z mask)
+	// match: (ShiftRightConcatMaskedUint32x16 x y z mask)
 	// result: (VPSHRDVDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
 	for {
 		x := v_0
@@ -53064,13 +53064,13 @@ func rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedUint32x16(v *Value) boo
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedUint32x4(v *Value) bool {
+func rewriteValueAMD64_OpShiftRightConcatMaskedUint32x4(v *Value) bool {
 	v_3 := v.Args[3]
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftRightAndFillUpperFromMaskedUint32x4 x y z mask)
+	// match: (ShiftRightConcatMaskedUint32x4 x y z mask)
 	// result: (VPSHRDVDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
 	for {
 		x := v_0
@@ -53084,13 +53084,13 @@ func rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedUint32x4(v *Value) bool
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedUint32x8(v *Value) bool {
+func rewriteValueAMD64_OpShiftRightConcatMaskedUint32x8(v *Value) bool {
 	v_3 := v.Args[3]
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftRightAndFillUpperFromMaskedUint32x8 x y z mask)
+	// match: (ShiftRightConcatMaskedUint32x8 x y z mask)
 	// result: (VPSHRDVDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
 	for {
 		x := v_0
@@ -53104,13 +53104,13 @@ func rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedUint32x8(v *Value) bool
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedUint64x2(v *Value) bool {
+func rewriteValueAMD64_OpShiftRightConcatMaskedUint64x2(v *Value) bool {
 	v_3 := v.Args[3]
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftRightAndFillUpperFromMaskedUint64x2 x y z mask)
+	// match: (ShiftRightConcatMaskedUint64x2 x y z mask)
 	// result: (VPSHRDVQMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
 	for {
 		x := v_0
@@ -53124,13 +53124,13 @@ func rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedUint64x2(v *Value) bool
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedUint64x4(v *Value) bool {
+func rewriteValueAMD64_OpShiftRightConcatMaskedUint64x4(v *Value) bool {
 	v_3 := v.Args[3]
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftRightAndFillUpperFromMaskedUint64x4 x y z mask)
+	// match: (ShiftRightConcatMaskedUint64x4 x y z mask)
 	// result: (VPSHRDVQMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
 	for {
 		x := v_0
@@ -53144,13 +53144,13 @@ func rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedUint64x4(v *Value) bool
 		return true
 	}
 }
-func rewriteValueAMD64_OpShiftRightAndFillUpperFromMaskedUint64x8(v *Value) bool {
+func rewriteValueAMD64_OpShiftRightConcatMaskedUint64x8(v *Value) bool {
 	v_3 := v.Args[3]
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (ShiftRightAndFillUpperFromMaskedUint64x8 x y z mask)
+	// match: (ShiftRightConcatMaskedUint64x8 x y z mask)
 	// result: (VPSHRDVQMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
 	for {
 		x := v_0
diff --git a/src/cmd/compile/internal/ssagen/simdintrinsics.go b/src/cmd/compile/internal/ssagen/simdintrinsics.go
index a30144cbd1..d6c5b889ed 100644
--- a/src/cmd/compile/internal/ssagen/simdintrinsics.go
+++ b/src/cmd/compile/internal/ssagen/simdintrinsics.go
@@ -1412,42 +1412,42 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint64x2.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftUint64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint64x4.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftUint64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint64x8.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftUint64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.ShiftAllLeftAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllLeftAndFillUpperFromInt16x8, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Int16x16.ShiftAllLeftAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllLeftAndFillUpperFromInt16x16, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Int16x32.ShiftAllLeftAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllLeftAndFillUpperFromInt16x32, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Int32x4.ShiftAllLeftAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllLeftAndFillUpperFromInt32x4, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Int32x8.ShiftAllLeftAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllLeftAndFillUpperFromInt32x8, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Int32x16.ShiftAllLeftAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllLeftAndFillUpperFromInt32x16, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Int64x2.ShiftAllLeftAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllLeftAndFillUpperFromInt64x2, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Int64x4.ShiftAllLeftAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllLeftAndFillUpperFromInt64x4, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Int64x8.ShiftAllLeftAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllLeftAndFillUpperFromInt64x8, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Uint16x8.ShiftAllLeftAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllLeftAndFillUpperFromUint16x8, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Uint16x16.ShiftAllLeftAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllLeftAndFillUpperFromUint16x16, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Uint16x32.ShiftAllLeftAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllLeftAndFillUpperFromUint16x32, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Uint32x4.ShiftAllLeftAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllLeftAndFillUpperFromUint32x4, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Uint32x8.ShiftAllLeftAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllLeftAndFillUpperFromUint32x8, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Uint32x16.ShiftAllLeftAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllLeftAndFillUpperFromUint32x16, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Uint64x2.ShiftAllLeftAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllLeftAndFillUpperFromUint64x2, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Uint64x4.ShiftAllLeftAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllLeftAndFillUpperFromUint64x4, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Uint64x8.ShiftAllLeftAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllLeftAndFillUpperFromUint64x8, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Int16x8.ShiftAllLeftAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllLeftAndFillUpperFromMaskedInt16x8, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Int16x16.ShiftAllLeftAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllLeftAndFillUpperFromMaskedInt16x16, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Int16x32.ShiftAllLeftAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllLeftAndFillUpperFromMaskedInt16x32, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Int32x4.ShiftAllLeftAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllLeftAndFillUpperFromMaskedInt32x4, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Int32x8.ShiftAllLeftAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllLeftAndFillUpperFromMaskedInt32x8, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Int32x16.ShiftAllLeftAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllLeftAndFillUpperFromMaskedInt32x16, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Int64x2.ShiftAllLeftAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllLeftAndFillUpperFromMaskedInt64x2, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Int64x4.ShiftAllLeftAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllLeftAndFillUpperFromMaskedInt64x4, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Int64x8.ShiftAllLeftAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllLeftAndFillUpperFromMaskedInt64x8, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Uint16x8.ShiftAllLeftAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllLeftAndFillUpperFromMaskedUint16x8, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Uint16x16.ShiftAllLeftAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllLeftAndFillUpperFromMaskedUint16x16, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Uint16x32.ShiftAllLeftAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllLeftAndFillUpperFromMaskedUint16x32, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Uint32x4.ShiftAllLeftAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllLeftAndFillUpperFromMaskedUint32x4, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Uint32x8.ShiftAllLeftAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllLeftAndFillUpperFromMaskedUint32x8, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Uint32x16.ShiftAllLeftAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllLeftAndFillUpperFromMaskedUint32x16, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Uint64x2.ShiftAllLeftAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllLeftAndFillUpperFromMaskedUint64x2, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Uint64x4.ShiftAllLeftAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllLeftAndFillUpperFromMaskedUint64x4, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Uint64x8.ShiftAllLeftAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllLeftAndFillUpperFromMaskedUint64x8, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Int16x8.ShiftAllLeftConcat", opLen2Imm8(ssa.OpShiftAllLeftConcatInt16x8, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Int16x16.ShiftAllLeftConcat", opLen2Imm8(ssa.OpShiftAllLeftConcatInt16x16, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Int16x32.ShiftAllLeftConcat", opLen2Imm8(ssa.OpShiftAllLeftConcatInt16x32, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Int32x4.ShiftAllLeftConcat", opLen2Imm8(ssa.OpShiftAllLeftConcatInt32x4, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Int32x8.ShiftAllLeftConcat", opLen2Imm8(ssa.OpShiftAllLeftConcatInt32x8, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Int32x16.ShiftAllLeftConcat", opLen2Imm8(ssa.OpShiftAllLeftConcatInt32x16, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Int64x2.ShiftAllLeftConcat", opLen2Imm8(ssa.OpShiftAllLeftConcatInt64x2, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Int64x4.ShiftAllLeftConcat", opLen2Imm8(ssa.OpShiftAllLeftConcatInt64x4, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Int64x8.ShiftAllLeftConcat", opLen2Imm8(ssa.OpShiftAllLeftConcatInt64x8, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Uint16x8.ShiftAllLeftConcat", opLen2Imm8(ssa.OpShiftAllLeftConcatUint16x8, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Uint16x16.ShiftAllLeftConcat", opLen2Imm8(ssa.OpShiftAllLeftConcatUint16x16, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Uint16x32.ShiftAllLeftConcat", opLen2Imm8(ssa.OpShiftAllLeftConcatUint16x32, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Uint32x4.ShiftAllLeftConcat", opLen2Imm8(ssa.OpShiftAllLeftConcatUint32x4, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Uint32x8.ShiftAllLeftConcat", opLen2Imm8(ssa.OpShiftAllLeftConcatUint32x8, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Uint32x16.ShiftAllLeftConcat", opLen2Imm8(ssa.OpShiftAllLeftConcatUint32x16, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Uint64x2.ShiftAllLeftConcat", opLen2Imm8(ssa.OpShiftAllLeftConcatUint64x2, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Uint64x4.ShiftAllLeftConcat", opLen2Imm8(ssa.OpShiftAllLeftConcatUint64x4, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Uint64x8.ShiftAllLeftConcat", opLen2Imm8(ssa.OpShiftAllLeftConcatUint64x8, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Int16x8.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedInt16x8, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Int16x16.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedInt16x16, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Int16x32.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedInt16x32, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Int32x4.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedInt32x4, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Int32x8.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedInt32x8, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Int32x16.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedInt32x16, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Int64x2.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedInt64x2, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Int64x4.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedInt64x4, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Int64x8.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedInt64x8, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Uint16x8.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedUint16x8, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Uint16x16.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedUint16x16, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Uint16x32.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedUint16x32, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Uint32x4.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedUint32x4, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Uint32x8.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedUint32x8, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Uint32x16.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedUint32x16, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Uint64x2.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedUint64x2, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Uint64x4.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedUint64x4, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Uint64x8.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedUint64x8, types.TypeVec512, 0), sys.AMD64)
 	addF(simdPackage, "Int16x8.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt16x8, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int16x16.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt16x16, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int16x32.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt16x32, types.TypeVec512), sys.AMD64)
@@ -1484,42 +1484,42 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint64x2.ShiftAllRight", opLen2(ssa.OpShiftAllRightUint64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint64x4.ShiftAllRight", opLen2(ssa.OpShiftAllRightUint64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint64x8.ShiftAllRight", opLen2(ssa.OpShiftAllRightUint64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.ShiftAllRightAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllRightAndFillUpperFromInt16x8, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Int16x16.ShiftAllRightAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllRightAndFillUpperFromInt16x16, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Int16x32.ShiftAllRightAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllRightAndFillUpperFromInt16x32, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Int32x4.ShiftAllRightAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllRightAndFillUpperFromInt32x4, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Int32x8.ShiftAllRightAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllRightAndFillUpperFromInt32x8, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Int32x16.ShiftAllRightAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllRightAndFillUpperFromInt32x16, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Int64x2.ShiftAllRightAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllRightAndFillUpperFromInt64x2, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Int64x4.ShiftAllRightAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllRightAndFillUpperFromInt64x4, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Int64x8.ShiftAllRightAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllRightAndFillUpperFromInt64x8, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Uint16x8.ShiftAllRightAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllRightAndFillUpperFromUint16x8, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Uint16x16.ShiftAllRightAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllRightAndFillUpperFromUint16x16, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Uint16x32.ShiftAllRightAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllRightAndFillUpperFromUint16x32, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Uint32x4.ShiftAllRightAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllRightAndFillUpperFromUint32x4, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Uint32x8.ShiftAllRightAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllRightAndFillUpperFromUint32x8, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Uint32x16.ShiftAllRightAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllRightAndFillUpperFromUint32x16, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Uint64x2.ShiftAllRightAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllRightAndFillUpperFromUint64x2, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Uint64x4.ShiftAllRightAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllRightAndFillUpperFromUint64x4, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Uint64x8.ShiftAllRightAndFillUpperFrom", opLen2Imm8(ssa.OpShiftAllRightAndFillUpperFromUint64x8, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Int16x8.ShiftAllRightAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllRightAndFillUpperFromMaskedInt16x8, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Int16x16.ShiftAllRightAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllRightAndFillUpperFromMaskedInt16x16, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Int16x32.ShiftAllRightAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllRightAndFillUpperFromMaskedInt16x32, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Int32x4.ShiftAllRightAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllRightAndFillUpperFromMaskedInt32x4, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Int32x8.ShiftAllRightAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllRightAndFillUpperFromMaskedInt32x8, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Int32x16.ShiftAllRightAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllRightAndFillUpperFromMaskedInt32x16, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Int64x2.ShiftAllRightAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllRightAndFillUpperFromMaskedInt64x2, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Int64x4.ShiftAllRightAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllRightAndFillUpperFromMaskedInt64x4, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Int64x8.ShiftAllRightAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllRightAndFillUpperFromMaskedInt64x8, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Uint16x8.ShiftAllRightAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllRightAndFillUpperFromMaskedUint16x8, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Uint16x16.ShiftAllRightAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllRightAndFillUpperFromMaskedUint16x16, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Uint16x32.ShiftAllRightAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllRightAndFillUpperFromMaskedUint16x32, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Uint32x4.ShiftAllRightAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllRightAndFillUpperFromMaskedUint32x4, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Uint32x8.ShiftAllRightAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllRightAndFillUpperFromMaskedUint32x8, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Uint32x16.ShiftAllRightAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllRightAndFillUpperFromMaskedUint32x16, types.TypeVec512, 0), sys.AMD64)
-	addF(simdPackage, "Uint64x2.ShiftAllRightAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllRightAndFillUpperFromMaskedUint64x2, types.TypeVec128, 0), sys.AMD64)
-	addF(simdPackage, "Uint64x4.ShiftAllRightAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllRightAndFillUpperFromMaskedUint64x4, types.TypeVec256, 0), sys.AMD64)
-	addF(simdPackage, "Uint64x8.ShiftAllRightAndFillUpperFromMasked", opLen3Imm8(ssa.OpShiftAllRightAndFillUpperFromMaskedUint64x8, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Int16x8.ShiftAllRightConcat", opLen2Imm8(ssa.OpShiftAllRightConcatInt16x8, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Int16x16.ShiftAllRightConcat", opLen2Imm8(ssa.OpShiftAllRightConcatInt16x16, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Int16x32.ShiftAllRightConcat", opLen2Imm8(ssa.OpShiftAllRightConcatInt16x32, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Int32x4.ShiftAllRightConcat", opLen2Imm8(ssa.OpShiftAllRightConcatInt32x4, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Int32x8.ShiftAllRightConcat", opLen2Imm8(ssa.OpShiftAllRightConcatInt32x8, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Int32x16.ShiftAllRightConcat", opLen2Imm8(ssa.OpShiftAllRightConcatInt32x16, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Int64x2.ShiftAllRightConcat", opLen2Imm8(ssa.OpShiftAllRightConcatInt64x2, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Int64x4.ShiftAllRightConcat", opLen2Imm8(ssa.OpShiftAllRightConcatInt64x4, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Int64x8.ShiftAllRightConcat", opLen2Imm8(ssa.OpShiftAllRightConcatInt64x8, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Uint16x8.ShiftAllRightConcat", opLen2Imm8(ssa.OpShiftAllRightConcatUint16x8, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Uint16x16.ShiftAllRightConcat", opLen2Imm8(ssa.OpShiftAllRightConcatUint16x16, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Uint16x32.ShiftAllRightConcat", opLen2Imm8(ssa.OpShiftAllRightConcatUint16x32, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Uint32x4.ShiftAllRightConcat", opLen2Imm8(ssa.OpShiftAllRightConcatUint32x4, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Uint32x8.ShiftAllRightConcat", opLen2Imm8(ssa.OpShiftAllRightConcatUint32x8, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Uint32x16.ShiftAllRightConcat", opLen2Imm8(ssa.OpShiftAllRightConcatUint32x16, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Uint64x2.ShiftAllRightConcat", opLen2Imm8(ssa.OpShiftAllRightConcatUint64x2, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Uint64x4.ShiftAllRightConcat", opLen2Imm8(ssa.OpShiftAllRightConcatUint64x4, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Uint64x8.ShiftAllRightConcat", opLen2Imm8(ssa.OpShiftAllRightConcatUint64x8, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Int16x8.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedInt16x8, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Int16x16.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedInt16x16, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Int16x32.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedInt16x32, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Int32x4.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedInt32x4, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Int32x8.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedInt32x8, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Int32x16.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedInt32x16, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Int64x2.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedInt64x2, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Int64x4.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedInt64x4, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Int64x8.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedInt64x8, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Uint16x8.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedUint16x8, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Uint16x16.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedUint16x16, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Uint16x32.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedUint16x32, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Uint32x4.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedUint32x4, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Uint32x8.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedUint32x8, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Uint32x16.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedUint32x16, types.TypeVec512, 0), sys.AMD64)
+	addF(simdPackage, "Uint64x2.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedUint64x2, types.TypeVec128, 0), sys.AMD64)
+	addF(simdPackage, "Uint64x4.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedUint64x4, types.TypeVec256, 0), sys.AMD64)
+	addF(simdPackage, "Uint64x8.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedUint64x8, types.TypeVec512, 0), sys.AMD64)
 	addF(simdPackage, "Int16x8.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt16x8, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int16x16.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt16x16, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int16x32.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt16x32, types.TypeVec512), sys.AMD64)
@@ -1556,42 +1556,42 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint64x2.ShiftLeft", opLen2(ssa.OpShiftLeftUint64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint64x4.ShiftLeft", opLen2(ssa.OpShiftLeftUint64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint64x8.ShiftLeft", opLen2(ssa.OpShiftLeftUint64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.ShiftLeftAndFillUpperFrom", opLen3(ssa.OpShiftLeftAndFillUpperFromInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.ShiftLeftAndFillUpperFrom", opLen3(ssa.OpShiftLeftAndFillUpperFromInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.ShiftLeftAndFillUpperFrom", opLen3(ssa.OpShiftLeftAndFillUpperFromInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.ShiftLeftAndFillUpperFrom", opLen3(ssa.OpShiftLeftAndFillUpperFromInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.ShiftLeftAndFillUpperFrom", opLen3(ssa.OpShiftLeftAndFillUpperFromInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.ShiftLeftAndFillUpperFrom", opLen3(ssa.OpShiftLeftAndFillUpperFromInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.ShiftLeftAndFillUpperFrom", opLen3(ssa.OpShiftLeftAndFillUpperFromInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.ShiftLeftAndFillUpperFrom", opLen3(ssa.OpShiftLeftAndFillUpperFromInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.ShiftLeftAndFillUpperFrom", opLen3(ssa.OpShiftLeftAndFillUpperFromInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.ShiftLeftAndFillUpperFrom", opLen3(ssa.OpShiftLeftAndFillUpperFromUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.ShiftLeftAndFillUpperFrom", opLen3(ssa.OpShiftLeftAndFillUpperFromUint16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x32.ShiftLeftAndFillUpperFrom", opLen3(ssa.OpShiftLeftAndFillUpperFromUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.ShiftLeftAndFillUpperFrom", opLen3(ssa.OpShiftLeftAndFillUpperFromUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.ShiftLeftAndFillUpperFrom", opLen3(ssa.OpShiftLeftAndFillUpperFromUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.ShiftLeftAndFillUpperFrom", opLen3(ssa.OpShiftLeftAndFillUpperFromUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.ShiftLeftAndFillUpperFrom", opLen3(ssa.OpShiftLeftAndFillUpperFromUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.ShiftLeftAndFillUpperFrom", opLen3(ssa.OpShiftLeftAndFillUpperFromUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.ShiftLeftAndFillUpperFrom", opLen3(ssa.OpShiftLeftAndFillUpperFromUint64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.ShiftLeftAndFillUpperFromMasked", opLen4(ssa.OpShiftLeftAndFillUpperFromMaskedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.ShiftLeftAndFillUpperFromMasked", opLen4(ssa.OpShiftLeftAndFillUpperFromMaskedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.ShiftLeftAndFillUpperFromMasked", opLen4(ssa.OpShiftLeftAndFillUpperFromMaskedInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.ShiftLeftAndFillUpperFromMasked", opLen4(ssa.OpShiftLeftAndFillUpperFromMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.ShiftLeftAndFillUpperFromMasked", opLen4(ssa.OpShiftLeftAndFillUpperFromMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.ShiftLeftAndFillUpperFromMasked", opLen4(ssa.OpShiftLeftAndFillUpperFromMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.ShiftLeftAndFillUpperFromMasked", opLen4(ssa.OpShiftLeftAndFillUpperFromMaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.ShiftLeftAndFillUpperFromMasked", opLen4(ssa.OpShiftLeftAndFillUpperFromMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.ShiftLeftAndFillUpperFromMasked", opLen4(ssa.OpShiftLeftAndFillUpperFromMaskedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.ShiftLeftAndFillUpperFromMasked", opLen4(ssa.OpShiftLeftAndFillUpperFromMaskedUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.ShiftLeftAndFillUpperFromMasked", opLen4(ssa.OpShiftLeftAndFillUpperFromMaskedUint16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x32.ShiftLeftAndFillUpperFromMasked", opLen4(ssa.OpShiftLeftAndFillUpperFromMaskedUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.ShiftLeftAndFillUpperFromMasked", opLen4(ssa.OpShiftLeftAndFillUpperFromMaskedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.ShiftLeftAndFillUpperFromMasked", opLen4(ssa.OpShiftLeftAndFillUpperFromMaskedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.ShiftLeftAndFillUpperFromMasked", opLen4(ssa.OpShiftLeftAndFillUpperFromMaskedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.ShiftLeftAndFillUpperFromMasked", opLen4(ssa.OpShiftLeftAndFillUpperFromMaskedUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.ShiftLeftAndFillUpperFromMasked", opLen4(ssa.OpShiftLeftAndFillUpperFromMaskedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.ShiftLeftAndFillUpperFromMasked", opLen4(ssa.OpShiftLeftAndFillUpperFromMaskedUint64x8, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int16x8.ShiftLeftConcat", opLen3(ssa.OpShiftLeftConcatInt16x8, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int16x16.ShiftLeftConcat", opLen3(ssa.OpShiftLeftConcatInt16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int16x32.ShiftLeftConcat", opLen3(ssa.OpShiftLeftConcatInt16x32, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int32x4.ShiftLeftConcat", opLen3(ssa.OpShiftLeftConcatInt32x4, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int32x8.ShiftLeftConcat", opLen3(ssa.OpShiftLeftConcatInt32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int32x16.ShiftLeftConcat", opLen3(ssa.OpShiftLeftConcatInt32x16, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int64x2.ShiftLeftConcat", opLen3(ssa.OpShiftLeftConcatInt64x2, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int64x4.ShiftLeftConcat", opLen3(ssa.OpShiftLeftConcatInt64x4, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int64x8.ShiftLeftConcat", opLen3(ssa.OpShiftLeftConcatInt64x8, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint16x8.ShiftLeftConcat", opLen3(ssa.OpShiftLeftConcatUint16x8, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint16x16.ShiftLeftConcat", opLen3(ssa.OpShiftLeftConcatUint16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint16x32.ShiftLeftConcat", opLen3(ssa.OpShiftLeftConcatUint16x32, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint32x4.ShiftLeftConcat", opLen3(ssa.OpShiftLeftConcatUint32x4, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint32x8.ShiftLeftConcat", opLen3(ssa.OpShiftLeftConcatUint32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint32x16.ShiftLeftConcat", opLen3(ssa.OpShiftLeftConcatUint32x16, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint64x2.ShiftLeftConcat", opLen3(ssa.OpShiftLeftConcatUint64x2, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint64x4.ShiftLeftConcat", opLen3(ssa.OpShiftLeftConcatUint64x4, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint64x8.ShiftLeftConcat", opLen3(ssa.OpShiftLeftConcatUint64x8, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int16x8.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedInt16x8, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int16x16.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedInt16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int16x32.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedInt16x32, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int32x4.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedInt32x4, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int32x8.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedInt32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int32x16.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedInt32x16, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int64x2.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedInt64x2, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int64x4.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedInt64x4, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int64x8.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedInt64x8, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint16x8.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedUint16x8, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint16x16.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedUint16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint16x32.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedUint16x32, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint32x4.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedUint32x4, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint32x8.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedUint32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint32x16.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedUint32x16, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint64x2.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedUint64x2, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint64x4.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedUint64x4, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint64x8.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int16x8.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedInt16x8, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int16x16.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedInt16x16, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int16x32.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedInt16x32, types.TypeVec512), sys.AMD64)
@@ -1628,42 +1628,42 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint64x2.ShiftRight", opLen2(ssa.OpShiftRightUint64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint64x4.ShiftRight", opLen2(ssa.OpShiftRightUint64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint64x8.ShiftRight", opLen2(ssa.OpShiftRightUint64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.ShiftRightAndFillUpperFrom", opLen3(ssa.OpShiftRightAndFillUpperFromInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.ShiftRightAndFillUpperFrom", opLen3(ssa.OpShiftRightAndFillUpperFromInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.ShiftRightAndFillUpperFrom", opLen3(ssa.OpShiftRightAndFillUpperFromInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.ShiftRightAndFillUpperFrom", opLen3(ssa.OpShiftRightAndFillUpperFromInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.ShiftRightAndFillUpperFrom", opLen3(ssa.OpShiftRightAndFillUpperFromInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.ShiftRightAndFillUpperFrom", opLen3(ssa.OpShiftRightAndFillUpperFromInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.ShiftRightAndFillUpperFrom", opLen3(ssa.OpShiftRightAndFillUpperFromInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.ShiftRightAndFillUpperFrom", opLen3(ssa.OpShiftRightAndFillUpperFromInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.ShiftRightAndFillUpperFrom", opLen3(ssa.OpShiftRightAndFillUpperFromInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.ShiftRightAndFillUpperFrom", opLen3(ssa.OpShiftRightAndFillUpperFromUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.ShiftRightAndFillUpperFrom", opLen3(ssa.OpShiftRightAndFillUpperFromUint16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x32.ShiftRightAndFillUpperFrom", opLen3(ssa.OpShiftRightAndFillUpperFromUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.ShiftRightAndFillUpperFrom", opLen3(ssa.OpShiftRightAndFillUpperFromUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.ShiftRightAndFillUpperFrom", opLen3(ssa.OpShiftRightAndFillUpperFromUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.ShiftRightAndFillUpperFrom", opLen3(ssa.OpShiftRightAndFillUpperFromUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.ShiftRightAndFillUpperFrom", opLen3(ssa.OpShiftRightAndFillUpperFromUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.ShiftRightAndFillUpperFrom", opLen3(ssa.OpShiftRightAndFillUpperFromUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.ShiftRightAndFillUpperFrom", opLen3(ssa.OpShiftRightAndFillUpperFromUint64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.ShiftRightAndFillUpperFromMasked", opLen4(ssa.OpShiftRightAndFillUpperFromMaskedInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.ShiftRightAndFillUpperFromMasked", opLen4(ssa.OpShiftRightAndFillUpperFromMaskedInt16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int16x32.ShiftRightAndFillUpperFromMasked", opLen4(ssa.OpShiftRightAndFillUpperFromMaskedInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.ShiftRightAndFillUpperFromMasked", opLen4(ssa.OpShiftRightAndFillUpperFromMaskedInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.ShiftRightAndFillUpperFromMasked", opLen4(ssa.OpShiftRightAndFillUpperFromMaskedInt32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int32x16.ShiftRightAndFillUpperFromMasked", opLen4(ssa.OpShiftRightAndFillUpperFromMaskedInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.ShiftRightAndFillUpperFromMasked", opLen4(ssa.OpShiftRightAndFillUpperFromMaskedInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.ShiftRightAndFillUpperFromMasked", opLen4(ssa.OpShiftRightAndFillUpperFromMaskedInt64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Int64x8.ShiftRightAndFillUpperFromMasked", opLen4(ssa.OpShiftRightAndFillUpperFromMaskedInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.ShiftRightAndFillUpperFromMasked", opLen4(ssa.OpShiftRightAndFillUpperFromMaskedUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.ShiftRightAndFillUpperFromMasked", opLen4(ssa.OpShiftRightAndFillUpperFromMaskedUint16x16, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint16x32.ShiftRightAndFillUpperFromMasked", opLen4(ssa.OpShiftRightAndFillUpperFromMaskedUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.ShiftRightAndFillUpperFromMasked", opLen4(ssa.OpShiftRightAndFillUpperFromMaskedUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.ShiftRightAndFillUpperFromMasked", opLen4(ssa.OpShiftRightAndFillUpperFromMaskedUint32x8, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint32x16.ShiftRightAndFillUpperFromMasked", opLen4(ssa.OpShiftRightAndFillUpperFromMaskedUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.ShiftRightAndFillUpperFromMasked", opLen4(ssa.OpShiftRightAndFillUpperFromMaskedUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.ShiftRightAndFillUpperFromMasked", opLen4(ssa.OpShiftRightAndFillUpperFromMaskedUint64x4, types.TypeVec256), sys.AMD64)
-	addF(simdPackage, "Uint64x8.ShiftRightAndFillUpperFromMasked", opLen4(ssa.OpShiftRightAndFillUpperFromMaskedUint64x8, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int16x8.ShiftRightConcat", opLen3(ssa.OpShiftRightConcatInt16x8, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int16x16.ShiftRightConcat", opLen3(ssa.OpShiftRightConcatInt16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int16x32.ShiftRightConcat", opLen3(ssa.OpShiftRightConcatInt16x32, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int32x4.ShiftRightConcat", opLen3(ssa.OpShiftRightConcatInt32x4, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int32x8.ShiftRightConcat", opLen3(ssa.OpShiftRightConcatInt32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int32x16.ShiftRightConcat", opLen3(ssa.OpShiftRightConcatInt32x16, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int64x2.ShiftRightConcat", opLen3(ssa.OpShiftRightConcatInt64x2, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int64x4.ShiftRightConcat", opLen3(ssa.OpShiftRightConcatInt64x4, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int64x8.ShiftRightConcat", opLen3(ssa.OpShiftRightConcatInt64x8, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint16x8.ShiftRightConcat", opLen3(ssa.OpShiftRightConcatUint16x8, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint16x16.ShiftRightConcat", opLen3(ssa.OpShiftRightConcatUint16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint16x32.ShiftRightConcat", opLen3(ssa.OpShiftRightConcatUint16x32, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint32x4.ShiftRightConcat", opLen3(ssa.OpShiftRightConcatUint32x4, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint32x8.ShiftRightConcat", opLen3(ssa.OpShiftRightConcatUint32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint32x16.ShiftRightConcat", opLen3(ssa.OpShiftRightConcatUint32x16, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint64x2.ShiftRightConcat", opLen3(ssa.OpShiftRightConcatUint64x2, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint64x4.ShiftRightConcat", opLen3(ssa.OpShiftRightConcatUint64x4, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint64x8.ShiftRightConcat", opLen3(ssa.OpShiftRightConcatUint64x8, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int16x8.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedInt16x8, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int16x16.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedInt16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int16x32.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedInt16x32, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int32x4.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedInt32x4, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int32x8.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedInt32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int32x16.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedInt32x16, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Int64x2.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedInt64x2, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Int64x4.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedInt64x4, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int64x8.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedInt64x8, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint16x8.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedUint16x8, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint16x16.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedUint16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint16x32.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedUint16x32, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint32x4.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedUint32x4, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint32x8.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedUint32x8, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint32x16.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedUint32x16, types.TypeVec512), sys.AMD64)
+	addF(simdPackage, "Uint64x2.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedUint64x2, types.TypeVec128), sys.AMD64)
+	addF(simdPackage, "Uint64x4.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedUint64x4, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint64x8.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int16x8.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedInt16x8, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int16x16.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedInt16x16, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int16x32.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedInt16x32, types.TypeVec512), sys.AMD64)
diff --git a/src/simd/ops_amd64.go b/src/simd/ops_amd64.go
index 8d94136090..f88410af43 100644
--- a/src/simd/ops_amd64.go
+++ b/src/simd/ops_amd64.go
@@ -9259,155 +9259,155 @@ func (x Uint64x4) ShiftAllLeft(y uint64) Uint64x4
 // Asm: VPSLLQ, CPU Feature: AVX512F
 func (x Uint64x8) ShiftAllLeft(y uint64) Uint64x8
 
-/* ShiftAllLeftAndFillUpperFrom */
+/* ShiftAllLeftConcat */
 
-// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHLDW, CPU Feature: AVX512VBMI2
-func (x Int16x8) ShiftAllLeftAndFillUpperFrom(shift uint8, y Int16x8) Int16x8
+func (x Int16x8) ShiftAllLeftConcat(shift uint8, y Int16x8) Int16x8
 
-// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHLDW, CPU Feature: AVX512VBMI2
-func (x Int16x16) ShiftAllLeftAndFillUpperFrom(shift uint8, y Int16x16) Int16x16
+func (x Int16x16) ShiftAllLeftConcat(shift uint8, y Int16x16) Int16x16
 
-// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHLDW, CPU Feature: AVX512VBMI2
-func (x Int16x32) ShiftAllLeftAndFillUpperFrom(shift uint8, y Int16x32) Int16x32
+func (x Int16x32) ShiftAllLeftConcat(shift uint8, y Int16x32) Int16x32
 
-// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHLDD, CPU Feature: AVX512VBMI2
-func (x Int32x4) ShiftAllLeftAndFillUpperFrom(shift uint8, y Int32x4) Int32x4
+func (x Int32x4) ShiftAllLeftConcat(shift uint8, y Int32x4) Int32x4
 
-// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHLDD, CPU Feature: AVX512VBMI2
-func (x Int32x8) ShiftAllLeftAndFillUpperFrom(shift uint8, y Int32x8) Int32x8
+func (x Int32x8) ShiftAllLeftConcat(shift uint8, y Int32x8) Int32x8
 
-// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHLDD, CPU Feature: AVX512VBMI2
-func (x Int32x16) ShiftAllLeftAndFillUpperFrom(shift uint8, y Int32x16) Int32x16
+func (x Int32x16) ShiftAllLeftConcat(shift uint8, y Int32x16) Int32x16
 
-// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
-func (x Int64x2) ShiftAllLeftAndFillUpperFrom(shift uint8, y Int64x2) Int64x2
+func (x Int64x2) ShiftAllLeftConcat(shift uint8, y Int64x2) Int64x2
 
-// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
-func (x Int64x4) ShiftAllLeftAndFillUpperFrom(shift uint8, y Int64x4) Int64x4
+func (x Int64x4) ShiftAllLeftConcat(shift uint8, y Int64x4) Int64x4
 
-// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
-func (x Int64x8) ShiftAllLeftAndFillUpperFrom(shift uint8, y Int64x8) Int64x8
+func (x Int64x8) ShiftAllLeftConcat(shift uint8, y Int64x8) Int64x8
 
-// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHLDW, CPU Feature: AVX512VBMI2
-func (x Uint16x8) ShiftAllLeftAndFillUpperFrom(shift uint8, y Uint16x8) Uint16x8
+func (x Uint16x8) ShiftAllLeftConcat(shift uint8, y Uint16x8) Uint16x8
 
-// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHLDW, CPU Feature: AVX512VBMI2
-func (x Uint16x16) ShiftAllLeftAndFillUpperFrom(shift uint8, y Uint16x16) Uint16x16
+func (x Uint16x16) ShiftAllLeftConcat(shift uint8, y Uint16x16) Uint16x16
 
-// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHLDW, CPU Feature: AVX512VBMI2
-func (x Uint16x32) ShiftAllLeftAndFillUpperFrom(shift uint8, y Uint16x32) Uint16x32
+func (x Uint16x32) ShiftAllLeftConcat(shift uint8, y Uint16x32) Uint16x32
 
-// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHLDD, CPU Feature: AVX512VBMI2
-func (x Uint32x4) ShiftAllLeftAndFillUpperFrom(shift uint8, y Uint32x4) Uint32x4
+func (x Uint32x4) ShiftAllLeftConcat(shift uint8, y Uint32x4) Uint32x4
 
-// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHLDD, CPU Feature: AVX512VBMI2
-func (x Uint32x8) ShiftAllLeftAndFillUpperFrom(shift uint8, y Uint32x8) Uint32x8
+func (x Uint32x8) ShiftAllLeftConcat(shift uint8, y Uint32x8) Uint32x8
 
-// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHLDD, CPU Feature: AVX512VBMI2
-func (x Uint32x16) ShiftAllLeftAndFillUpperFrom(shift uint8, y Uint32x16) Uint32x16
+func (x Uint32x16) ShiftAllLeftConcat(shift uint8, y Uint32x16) Uint32x16
 
-// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
-func (x Uint64x2) ShiftAllLeftAndFillUpperFrom(shift uint8, y Uint64x2) Uint64x2
+func (x Uint64x2) ShiftAllLeftConcat(shift uint8, y Uint64x2) Uint64x2
 
-// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
-func (x Uint64x4) ShiftAllLeftAndFillUpperFrom(shift uint8, y Uint64x4) Uint64x4
+func (x Uint64x4) ShiftAllLeftConcat(shift uint8, y Uint64x4) Uint64x4
 
-// ShiftAllLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
-func (x Uint64x8) ShiftAllLeftAndFillUpperFrom(shift uint8, y Uint64x8) Uint64x8
+func (x Uint64x8) ShiftAllLeftConcat(shift uint8, y Uint64x8) Uint64x8
 
-/* ShiftAllLeftAndFillUpperFromMasked */
+/* ShiftAllLeftConcatMasked */
 
-// ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
+// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
@@ -9415,9 +9415,9 @@ func (x Uint64x8) ShiftAllLeftAndFillUpperFrom(shift uint8, y Uint64x8) Uint64x8
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHLDW, CPU Feature: AVX512VBMI2
-func (x Int16x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int16x8, mask Mask16x8) Int16x8
+func (x Int16x8) ShiftAllLeftConcatMasked(shift uint8, y Int16x8, mask Mask16x8) Int16x8
 
-// ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
+// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
@@ -9425,9 +9425,9 @@ func (x Int16x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int16x8, mask
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHLDW, CPU Feature: AVX512VBMI2
-func (x Int16x16) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int16x16, mask Mask16x16) Int16x16
+func (x Int16x16) ShiftAllLeftConcatMasked(shift uint8, y Int16x16, mask Mask16x16) Int16x16
 
-// ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
+// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
@@ -9435,9 +9435,9 @@ func (x Int16x16) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int16x16, ma
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHLDW, CPU Feature: AVX512VBMI2
-func (x Int16x32) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int16x32, mask Mask16x32) Int16x32
+func (x Int16x32) ShiftAllLeftConcatMasked(shift uint8, y Int16x32, mask Mask16x32) Int16x32
 
-// ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
+// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
@@ -9445,9 +9445,9 @@ func (x Int16x32) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int16x32, ma
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHLDD, CPU Feature: AVX512VBMI2
-func (x Int32x4) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int32x4, mask Mask32x4) Int32x4
+func (x Int32x4) ShiftAllLeftConcatMasked(shift uint8, y Int32x4, mask Mask32x4) Int32x4
 
-// ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
+// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
@@ -9455,9 +9455,9 @@ func (x Int32x4) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int32x4, mask
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHLDD, CPU Feature: AVX512VBMI2
-func (x Int32x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int32x8, mask Mask32x8) Int32x8
+func (x Int32x8) ShiftAllLeftConcatMasked(shift uint8, y Int32x8, mask Mask32x8) Int32x8
 
-// ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
+// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
@@ -9465,9 +9465,9 @@ func (x Int32x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int32x8, mask
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHLDD, CPU Feature: AVX512VBMI2
-func (x Int32x16) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int32x16, mask Mask32x16) Int32x16
+func (x Int32x16) ShiftAllLeftConcatMasked(shift uint8, y Int32x16, mask Mask32x16) Int32x16
 
-// ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
+// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
@@ -9475,9 +9475,9 @@ func (x Int32x16) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int32x16, ma
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
-func (x Int64x2) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int64x2, mask Mask64x2) Int64x2
+func (x Int64x2) ShiftAllLeftConcatMasked(shift uint8, y Int64x2, mask Mask64x2) Int64x2
 
-// ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
+// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
@@ -9485,9 +9485,9 @@ func (x Int64x2) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int64x2, mask
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
-func (x Int64x4) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int64x4, mask Mask64x4) Int64x4
+func (x Int64x4) ShiftAllLeftConcatMasked(shift uint8, y Int64x4, mask Mask64x4) Int64x4
 
-// ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
+// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
@@ -9495,9 +9495,9 @@ func (x Int64x4) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int64x4, mask
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
-func (x Int64x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int64x8, mask Mask64x8) Int64x8
+func (x Int64x8) ShiftAllLeftConcatMasked(shift uint8, y Int64x8, mask Mask64x8) Int64x8
 
-// ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
+// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
@@ -9505,9 +9505,9 @@ func (x Int64x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Int64x8, mask
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHLDW, CPU Feature: AVX512VBMI2
-func (x Uint16x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint16x8, mask Mask16x8) Uint16x8
+func (x Uint16x8) ShiftAllLeftConcatMasked(shift uint8, y Uint16x8, mask Mask16x8) Uint16x8
 
-// ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
+// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
@@ -9515,9 +9515,9 @@ func (x Uint16x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint16x8, ma
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHLDW, CPU Feature: AVX512VBMI2
-func (x Uint16x16) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint16x16, mask Mask16x16) Uint16x16
+func (x Uint16x16) ShiftAllLeftConcatMasked(shift uint8, y Uint16x16, mask Mask16x16) Uint16x16
 
-// ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
+// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
@@ -9525,9 +9525,9 @@ func (x Uint16x16) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint16x16,
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHLDW, CPU Feature: AVX512VBMI2
-func (x Uint16x32) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint16x32, mask Mask16x32) Uint16x32
+func (x Uint16x32) ShiftAllLeftConcatMasked(shift uint8, y Uint16x32, mask Mask16x32) Uint16x32
 
-// ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
+// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
@@ -9535,9 +9535,9 @@ func (x Uint16x32) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint16x32,
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHLDD, CPU Feature: AVX512VBMI2
-func (x Uint32x4) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint32x4, mask Mask32x4) Uint32x4
+func (x Uint32x4) ShiftAllLeftConcatMasked(shift uint8, y Uint32x4, mask Mask32x4) Uint32x4
 
-// ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
+// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
@@ -9545,9 +9545,9 @@ func (x Uint32x4) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint32x4, ma
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHLDD, CPU Feature: AVX512VBMI2
-func (x Uint32x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint32x8, mask Mask32x8) Uint32x8
+func (x Uint32x8) ShiftAllLeftConcatMasked(shift uint8, y Uint32x8, mask Mask32x8) Uint32x8
 
-// ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
+// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
@@ -9555,9 +9555,9 @@ func (x Uint32x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint32x8, ma
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHLDD, CPU Feature: AVX512VBMI2
-func (x Uint32x16) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint32x16, mask Mask32x16) Uint32x16
+func (x Uint32x16) ShiftAllLeftConcatMasked(shift uint8, y Uint32x16, mask Mask32x16) Uint32x16
 
-// ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
+// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
@@ -9565,9 +9565,9 @@ func (x Uint32x16) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint32x16,
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
-func (x Uint64x2) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint64x2, mask Mask64x2) Uint64x2
+func (x Uint64x2) ShiftAllLeftConcatMasked(shift uint8, y Uint64x2, mask Mask64x2) Uint64x2
 
-// ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
+// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
@@ -9575,9 +9575,9 @@ func (x Uint64x2) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint64x2, ma
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
-func (x Uint64x4) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint64x4, mask Mask64x4) Uint64x4
+func (x Uint64x4) ShiftAllLeftConcatMasked(shift uint8, y Uint64x4, mask Mask64x4) Uint64x4
 
-// ShiftAllLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
+// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
@@ -9585,7 +9585,7 @@ func (x Uint64x4) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint64x4, ma
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
-func (x Uint64x8) ShiftAllLeftAndFillUpperFromMasked(shift uint8, y Uint64x8, mask Mask64x8) Uint64x8
+func (x Uint64x8) ShiftAllLeftConcatMasked(shift uint8, y Uint64x8, mask Mask64x8) Uint64x8
 
 /* ShiftAllLeftMasked */
 
@@ -9807,155 +9807,155 @@ func (x Uint64x4) ShiftAllRight(y uint64) Uint64x4
 // Asm: VPSRLQ, CPU Feature: AVX512F
 func (x Uint64x8) ShiftAllRight(y uint64) Uint64x8
 
-/* ShiftAllRightAndFillUpperFrom */
+/* ShiftAllRightConcat */
 
-// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHRDW, CPU Feature: AVX512VBMI2
-func (x Int16x8) ShiftAllRightAndFillUpperFrom(shift uint8, y Int16x8) Int16x8
+func (x Int16x8) ShiftAllRightConcat(shift uint8, y Int16x8) Int16x8
 
-// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHRDW, CPU Feature: AVX512VBMI2
-func (x Int16x16) ShiftAllRightAndFillUpperFrom(shift uint8, y Int16x16) Int16x16
+func (x Int16x16) ShiftAllRightConcat(shift uint8, y Int16x16) Int16x16
 
-// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHRDW, CPU Feature: AVX512VBMI2
-func (x Int16x32) ShiftAllRightAndFillUpperFrom(shift uint8, y Int16x32) Int16x32
+func (x Int16x32) ShiftAllRightConcat(shift uint8, y Int16x32) Int16x32
 
-// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHRDD, CPU Feature: AVX512VBMI2
-func (x Int32x4) ShiftAllRightAndFillUpperFrom(shift uint8, y Int32x4) Int32x4
+func (x Int32x4) ShiftAllRightConcat(shift uint8, y Int32x4) Int32x4
 
-// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHRDD, CPU Feature: AVX512VBMI2
-func (x Int32x8) ShiftAllRightAndFillUpperFrom(shift uint8, y Int32x8) Int32x8
+func (x Int32x8) ShiftAllRightConcat(shift uint8, y Int32x8) Int32x8
 
-// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHRDD, CPU Feature: AVX512VBMI2
-func (x Int32x16) ShiftAllRightAndFillUpperFrom(shift uint8, y Int32x16) Int32x16
+func (x Int32x16) ShiftAllRightConcat(shift uint8, y Int32x16) Int32x16
 
-// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
-func (x Int64x2) ShiftAllRightAndFillUpperFrom(shift uint8, y Int64x2) Int64x2
+func (x Int64x2) ShiftAllRightConcat(shift uint8, y Int64x2) Int64x2
 
-// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
-func (x Int64x4) ShiftAllRightAndFillUpperFrom(shift uint8, y Int64x4) Int64x4
+func (x Int64x4) ShiftAllRightConcat(shift uint8, y Int64x4) Int64x4
 
-// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
-func (x Int64x8) ShiftAllRightAndFillUpperFrom(shift uint8, y Int64x8) Int64x8
+func (x Int64x8) ShiftAllRightConcat(shift uint8, y Int64x8) Int64x8
 
-// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHRDW, CPU Feature: AVX512VBMI2
-func (x Uint16x8) ShiftAllRightAndFillUpperFrom(shift uint8, y Uint16x8) Uint16x8
+func (x Uint16x8) ShiftAllRightConcat(shift uint8, y Uint16x8) Uint16x8
 
-// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHRDW, CPU Feature: AVX512VBMI2
-func (x Uint16x16) ShiftAllRightAndFillUpperFrom(shift uint8, y Uint16x16) Uint16x16
+func (x Uint16x16) ShiftAllRightConcat(shift uint8, y Uint16x16) Uint16x16
 
-// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHRDW, CPU Feature: AVX512VBMI2
-func (x Uint16x32) ShiftAllRightAndFillUpperFrom(shift uint8, y Uint16x32) Uint16x32
+func (x Uint16x32) ShiftAllRightConcat(shift uint8, y Uint16x32) Uint16x32
 
-// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHRDD, CPU Feature: AVX512VBMI2
-func (x Uint32x4) ShiftAllRightAndFillUpperFrom(shift uint8, y Uint32x4) Uint32x4
+func (x Uint32x4) ShiftAllRightConcat(shift uint8, y Uint32x4) Uint32x4
 
-// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHRDD, CPU Feature: AVX512VBMI2
-func (x Uint32x8) ShiftAllRightAndFillUpperFrom(shift uint8, y Uint32x8) Uint32x8
+func (x Uint32x8) ShiftAllRightConcat(shift uint8, y Uint32x8) Uint32x8
 
-// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHRDD, CPU Feature: AVX512VBMI2
-func (x Uint32x16) ShiftAllRightAndFillUpperFrom(shift uint8, y Uint32x16) Uint32x16
+func (x Uint32x16) ShiftAllRightConcat(shift uint8, y Uint32x16) Uint32x16
 
-// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
-func (x Uint64x2) ShiftAllRightAndFillUpperFrom(shift uint8, y Uint64x2) Uint64x2
+func (x Uint64x2) ShiftAllRightConcat(shift uint8, y Uint64x2) Uint64x2
 
-// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
-func (x Uint64x4) ShiftAllRightAndFillUpperFrom(shift uint8, y Uint64x4) Uint64x4
+func (x Uint64x4) ShiftAllRightConcat(shift uint8, y Uint64x4) Uint64x4
 
-// ShiftAllRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
-func (x Uint64x8) ShiftAllRightAndFillUpperFrom(shift uint8, y Uint64x8) Uint64x8
+func (x Uint64x8) ShiftAllRightConcat(shift uint8, y Uint64x8) Uint64x8
 
-/* ShiftAllRightAndFillUpperFromMasked */
+/* ShiftAllRightConcatMasked */
 
-// ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
+// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
@@ -9963,9 +9963,9 @@ func (x Uint64x8) ShiftAllRightAndFillUpperFrom(shift uint8, y Uint64x8) Uint64x
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHRDW, CPU Feature: AVX512VBMI2
-func (x Int16x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int16x8, mask Mask16x8) Int16x8
+func (x Int16x8) ShiftAllRightConcatMasked(shift uint8, y Int16x8, mask Mask16x8) Int16x8
 
-// ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
+// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
@@ -9973,9 +9973,9 @@ func (x Int16x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int16x8, mas
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHRDW, CPU Feature: AVX512VBMI2
-func (x Int16x16) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int16x16, mask Mask16x16) Int16x16
+func (x Int16x16) ShiftAllRightConcatMasked(shift uint8, y Int16x16, mask Mask16x16) Int16x16
 
-// ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
+// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
@@ -9983,9 +9983,9 @@ func (x Int16x16) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int16x16, m
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHRDW, CPU Feature: AVX512VBMI2
-func (x Int16x32) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int16x32, mask Mask16x32) Int16x32
+func (x Int16x32) ShiftAllRightConcatMasked(shift uint8, y Int16x32, mask Mask16x32) Int16x32
 
-// ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
+// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
@@ -9993,9 +9993,9 @@ func (x Int16x32) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int16x32, m
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHRDD, CPU Feature: AVX512VBMI2
-func (x Int32x4) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int32x4, mask Mask32x4) Int32x4
+func (x Int32x4) ShiftAllRightConcatMasked(shift uint8, y Int32x4, mask Mask32x4) Int32x4
 
-// ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
+// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
@@ -10003,9 +10003,9 @@ func (x Int32x4) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int32x4, mas
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHRDD, CPU Feature: AVX512VBMI2
-func (x Int32x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int32x8, mask Mask32x8) Int32x8
+func (x Int32x8) ShiftAllRightConcatMasked(shift uint8, y Int32x8, mask Mask32x8) Int32x8
 
-// ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
+// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
@@ -10013,9 +10013,9 @@ func (x Int32x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int32x8, mas
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHRDD, CPU Feature: AVX512VBMI2
-func (x Int32x16) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int32x16, mask Mask32x16) Int32x16
+func (x Int32x16) ShiftAllRightConcatMasked(shift uint8, y Int32x16, mask Mask32x16) Int32x16
 
-// ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
+// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
@@ -10023,9 +10023,9 @@ func (x Int32x16) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int32x16, m
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
-func (x Int64x2) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int64x2, mask Mask64x2) Int64x2
+func (x Int64x2) ShiftAllRightConcatMasked(shift uint8, y Int64x2, mask Mask64x2) Int64x2
 
-// ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
+// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
@@ -10033,9 +10033,9 @@ func (x Int64x2) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int64x2, mas
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
-func (x Int64x4) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int64x4, mask Mask64x4) Int64x4
+func (x Int64x4) ShiftAllRightConcatMasked(shift uint8, y Int64x4, mask Mask64x4) Int64x4
 
-// ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
+// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
@@ -10043,9 +10043,9 @@ func (x Int64x4) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int64x4, mas
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
-func (x Int64x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int64x8, mask Mask64x8) Int64x8
+func (x Int64x8) ShiftAllRightConcatMasked(shift uint8, y Int64x8, mask Mask64x8) Int64x8
 
-// ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
+// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
@@ -10053,9 +10053,9 @@ func (x Int64x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Int64x8, mas
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHRDW, CPU Feature: AVX512VBMI2
-func (x Uint16x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint16x8, mask Mask16x8) Uint16x8
+func (x Uint16x8) ShiftAllRightConcatMasked(shift uint8, y Uint16x8, mask Mask16x8) Uint16x8
 
-// ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
+// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
@@ -10063,9 +10063,9 @@ func (x Uint16x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint16x8, m
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHRDW, CPU Feature: AVX512VBMI2
-func (x Uint16x16) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint16x16, mask Mask16x16) Uint16x16
+func (x Uint16x16) ShiftAllRightConcatMasked(shift uint8, y Uint16x16, mask Mask16x16) Uint16x16
 
-// ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
+// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
@@ -10073,9 +10073,9 @@ func (x Uint16x16) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint16x16,
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHRDW, CPU Feature: AVX512VBMI2
-func (x Uint16x32) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint16x32, mask Mask16x32) Uint16x32
+func (x Uint16x32) ShiftAllRightConcatMasked(shift uint8, y Uint16x32, mask Mask16x32) Uint16x32
 
-// ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
+// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
@@ -10083,9 +10083,9 @@ func (x Uint16x32) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint16x32,
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHRDD, CPU Feature: AVX512VBMI2
-func (x Uint32x4) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint32x4, mask Mask32x4) Uint32x4
+func (x Uint32x4) ShiftAllRightConcatMasked(shift uint8, y Uint32x4, mask Mask32x4) Uint32x4
 
-// ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
+// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
@@ -10093,9 +10093,9 @@ func (x Uint32x4) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint32x4, m
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHRDD, CPU Feature: AVX512VBMI2
-func (x Uint32x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint32x8, mask Mask32x8) Uint32x8
+func (x Uint32x8) ShiftAllRightConcatMasked(shift uint8, y Uint32x8, mask Mask32x8) Uint32x8
 
-// ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
+// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
@@ -10103,9 +10103,9 @@ func (x Uint32x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint32x8, m
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHRDD, CPU Feature: AVX512VBMI2
-func (x Uint32x16) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint32x16, mask Mask32x16) Uint32x16
+func (x Uint32x16) ShiftAllRightConcatMasked(shift uint8, y Uint32x16, mask Mask32x16) Uint32x16
 
-// ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
+// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
@@ -10113,9 +10113,9 @@ func (x Uint32x16) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint32x16,
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
-func (x Uint64x2) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint64x2, mask Mask64x2) Uint64x2
+func (x Uint64x2) ShiftAllRightConcatMasked(shift uint8, y Uint64x2, mask Mask64x2) Uint64x2
 
-// ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
+// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
@@ -10123,9 +10123,9 @@ func (x Uint64x2) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint64x2, m
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
-func (x Uint64x4) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint64x4, mask Mask64x4) Uint64x4
+func (x Uint64x4) ShiftAllRightConcatMasked(shift uint8, y Uint64x4, mask Mask64x4) Uint64x4
 
-// ShiftAllRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
+// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
 // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
@@ -10133,7 +10133,7 @@ func (x Uint64x4) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint64x4, m
 // shift is expected to be a constant, non-constant value will trigger a runtime panic.
 //
 // Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
-func (x Uint64x8) ShiftAllRightAndFillUpperFromMasked(shift uint8, y Uint64x8, mask Mask64x8) Uint64x8
+func (x Uint64x8) ShiftAllRightConcatMasked(shift uint8, y Uint64x8, mask Mask64x8) Uint64x8
 
 /* ShiftAllRightMasked */
 
@@ -10355,261 +10355,261 @@ func (x Uint64x4) ShiftLeft(y Uint64x4) Uint64x4
 // Asm: VPSLLVQ, CPU Feature: AVX512F
 func (x Uint64x8) ShiftLeft(y Uint64x8) Uint64x8
 
-/* ShiftLeftAndFillUpperFrom */
+/* ShiftLeftConcat */
 
-// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
 // Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
-func (x Int16x8) ShiftLeftAndFillUpperFrom(y Int16x8, z Int16x8) Int16x8
+func (x Int16x8) ShiftLeftConcat(y Int16x8, z Int16x8) Int16x8
 
-// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
 // Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
-func (x Int16x16) ShiftLeftAndFillUpperFrom(y Int16x16, z Int16x16) Int16x16
+func (x Int16x16) ShiftLeftConcat(y Int16x16, z Int16x16) Int16x16
 
-// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
 // Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
-func (x Int16x32) ShiftLeftAndFillUpperFrom(y Int16x32, z Int16x32) Int16x32
+func (x Int16x32) ShiftLeftConcat(y Int16x32, z Int16x32) Int16x32
 
-// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
 // Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
-func (x Int32x4) ShiftLeftAndFillUpperFrom(y Int32x4, z Int32x4) Int32x4
+func (x Int32x4) ShiftLeftConcat(y Int32x4, z Int32x4) Int32x4
 
-// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
 // Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
-func (x Int32x8) ShiftLeftAndFillUpperFrom(y Int32x8, z Int32x8) Int32x8
+func (x Int32x8) ShiftLeftConcat(y Int32x8, z Int32x8) Int32x8
 
-// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
 // Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
-func (x Int32x16) ShiftLeftAndFillUpperFrom(y Int32x16, z Int32x16) Int32x16
+func (x Int32x16) ShiftLeftConcat(y Int32x16, z Int32x16) Int32x16
 
-// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
 // Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
-func (x Int64x2) ShiftLeftAndFillUpperFrom(y Int64x2, z Int64x2) Int64x2
+func (x Int64x2) ShiftLeftConcat(y Int64x2, z Int64x2) Int64x2
 
-// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
 // Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
-func (x Int64x4) ShiftLeftAndFillUpperFrom(y Int64x4, z Int64x4) Int64x4
+func (x Int64x4) ShiftLeftConcat(y Int64x4, z Int64x4) Int64x4
 
-// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
 // Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
-func (x Int64x8) ShiftLeftAndFillUpperFrom(y Int64x8, z Int64x8) Int64x8
+func (x Int64x8) ShiftLeftConcat(y Int64x8, z Int64x8) Int64x8
 
-// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
 // Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
-func (x Uint16x8) ShiftLeftAndFillUpperFrom(y Uint16x8, z Uint16x8) Uint16x8
+func (x Uint16x8) ShiftLeftConcat(y Uint16x8, z Uint16x8) Uint16x8
 
-// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
 // Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
-func (x Uint16x16) ShiftLeftAndFillUpperFrom(y Uint16x16, z Uint16x16) Uint16x16
+func (x Uint16x16) ShiftLeftConcat(y Uint16x16, z Uint16x16) Uint16x16
 
-// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
 // Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
-func (x Uint16x32) ShiftLeftAndFillUpperFrom(y Uint16x32, z Uint16x32) Uint16x32
+func (x Uint16x32) ShiftLeftConcat(y Uint16x32, z Uint16x32) Uint16x32
 
-// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
 // Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x4) ShiftLeftAndFillUpperFrom(y Uint32x4, z Uint32x4) Uint32x4
+func (x Uint32x4) ShiftLeftConcat(y Uint32x4, z Uint32x4) Uint32x4
 
-// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
 // Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x8) ShiftLeftAndFillUpperFrom(y Uint32x8, z Uint32x8) Uint32x8
+func (x Uint32x8) ShiftLeftConcat(y Uint32x8, z Uint32x8) Uint32x8
 
-// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
 // Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x16) ShiftLeftAndFillUpperFrom(y Uint32x16, z Uint32x16) Uint32x16
+func (x Uint32x16) ShiftLeftConcat(y Uint32x16, z Uint32x16) Uint32x16
 
-// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
 // Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x2) ShiftLeftAndFillUpperFrom(y Uint64x2, z Uint64x2) Uint64x2
+func (x Uint64x2) ShiftLeftConcat(y Uint64x2, z Uint64x2) Uint64x2
 
-// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
 // Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x4) ShiftLeftAndFillUpperFrom(y Uint64x4, z Uint64x4) Uint64x4
+func (x Uint64x4) ShiftLeftConcat(y Uint64x4, z Uint64x4) Uint64x4
 
-// ShiftLeftAndFillUpperFrom shifts each element of x to the left by the number of bits specified by the
+// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
 // Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x8) ShiftLeftAndFillUpperFrom(y Uint64x8, z Uint64x8) Uint64x8
+func (x Uint64x8) ShiftLeftConcat(y Uint64x8, z Uint64x8) Uint64x8
 
-/* ShiftLeftAndFillUpperFromMasked */
+/* ShiftLeftConcatMasked */
 
-// ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
+// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
 //
 // Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
-func (x Int16x8) ShiftLeftAndFillUpperFromMasked(y Int16x8, z Int16x8, mask Mask16x8) Int16x8
+func (x Int16x8) ShiftLeftConcatMasked(y Int16x8, z Int16x8, mask Mask16x8) Int16x8
 
-// ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
+// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
 //
 // Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
-func (x Int16x16) ShiftLeftAndFillUpperFromMasked(y Int16x16, z Int16x16, mask Mask16x16) Int16x16
+func (x Int16x16) ShiftLeftConcatMasked(y Int16x16, z Int16x16, mask Mask16x16) Int16x16
 
-// ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
+// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
 //
 // Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
-func (x Int16x32) ShiftLeftAndFillUpperFromMasked(y Int16x32, z Int16x32, mask Mask16x32) Int16x32
+func (x Int16x32) ShiftLeftConcatMasked(y Int16x32, z Int16x32, mask Mask16x32) Int16x32
 
-// ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
+// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
 //
 // Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
-func (x Int32x4) ShiftLeftAndFillUpperFromMasked(y Int32x4, z Int32x4, mask Mask32x4) Int32x4
+func (x Int32x4) ShiftLeftConcatMasked(y Int32x4, z Int32x4, mask Mask32x4) Int32x4
 
-// ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
+// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
 //
 // Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
-func (x Int32x8) ShiftLeftAndFillUpperFromMasked(y Int32x8, z Int32x8, mask Mask32x8) Int32x8
+func (x Int32x8) ShiftLeftConcatMasked(y Int32x8, z Int32x8, mask Mask32x8) Int32x8
 
-// ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
+// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
 //
 // Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
-func (x Int32x16) ShiftLeftAndFillUpperFromMasked(y Int32x16, z Int32x16, mask Mask32x16) Int32x16
+func (x Int32x16) ShiftLeftConcatMasked(y Int32x16, z Int32x16, mask Mask32x16) Int32x16
 
-// ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
+// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
 //
 // Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
-func (x Int64x2) ShiftLeftAndFillUpperFromMasked(y Int64x2, z Int64x2, mask Mask64x2) Int64x2
+func (x Int64x2) ShiftLeftConcatMasked(y Int64x2, z Int64x2, mask Mask64x2) Int64x2
 
-// ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
+// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
 //
 // Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
-func (x Int64x4) ShiftLeftAndFillUpperFromMasked(y Int64x4, z Int64x4, mask Mask64x4) Int64x4
+func (x Int64x4) ShiftLeftConcatMasked(y Int64x4, z Int64x4, mask Mask64x4) Int64x4
 
-// ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
+// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
 //
 // Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
-func (x Int64x8) ShiftLeftAndFillUpperFromMasked(y Int64x8, z Int64x8, mask Mask64x8) Int64x8
+func (x Int64x8) ShiftLeftConcatMasked(y Int64x8, z Int64x8, mask Mask64x8) Int64x8
 
-// ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
+// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
 //
 // Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
-func (x Uint16x8) ShiftLeftAndFillUpperFromMasked(y Uint16x8, z Uint16x8, mask Mask16x8) Uint16x8
+func (x Uint16x8) ShiftLeftConcatMasked(y Uint16x8, z Uint16x8, mask Mask16x8) Uint16x8
 
-// ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
+// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
 //
 // Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
-func (x Uint16x16) ShiftLeftAndFillUpperFromMasked(y Uint16x16, z Uint16x16, mask Mask16x16) Uint16x16
+func (x Uint16x16) ShiftLeftConcatMasked(y Uint16x16, z Uint16x16, mask Mask16x16) Uint16x16
 
-// ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
+// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
 //
 // Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
-func (x Uint16x32) ShiftLeftAndFillUpperFromMasked(y Uint16x32, z Uint16x32, mask Mask16x32) Uint16x32
+func (x Uint16x32) ShiftLeftConcatMasked(y Uint16x32, z Uint16x32, mask Mask16x32) Uint16x32
 
-// ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
+// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
 //
 // Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x4) ShiftLeftAndFillUpperFromMasked(y Uint32x4, z Uint32x4, mask Mask32x4) Uint32x4
+func (x Uint32x4) ShiftLeftConcatMasked(y Uint32x4, z Uint32x4, mask Mask32x4) Uint32x4
 
-// ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
+// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
 //
 // Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x8) ShiftLeftAndFillUpperFromMasked(y Uint32x8, z Uint32x8, mask Mask32x8) Uint32x8
+func (x Uint32x8) ShiftLeftConcatMasked(y Uint32x8, z Uint32x8, mask Mask32x8) Uint32x8
 
-// ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
+// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
 //
 // Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x16) ShiftLeftAndFillUpperFromMasked(y Uint32x16, z Uint32x16, mask Mask32x16) Uint32x16
+func (x Uint32x16) ShiftLeftConcatMasked(y Uint32x16, z Uint32x16, mask Mask32x16) Uint32x16
 
-// ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
+// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
 //
 // Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x2) ShiftLeftAndFillUpperFromMasked(y Uint64x2, z Uint64x2, mask Mask64x2) Uint64x2
+func (x Uint64x2) ShiftLeftConcatMasked(y Uint64x2, z Uint64x2, mask Mask64x2) Uint64x2
 
-// ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
+// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
 //
 // Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x4) ShiftLeftAndFillUpperFromMasked(y Uint64x4, z Uint64x4, mask Mask64x4) Uint64x4
+func (x Uint64x4) ShiftLeftConcatMasked(y Uint64x4, z Uint64x4, mask Mask64x4) Uint64x4
 
-// ShiftLeftAndFillUpperFromMasked shifts each element of x to the left by the number of bits specified by the
+// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
 //
 // Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x8) ShiftLeftAndFillUpperFromMasked(y Uint64x8, z Uint64x8, mask Mask64x8) Uint64x8
+func (x Uint64x8) ShiftLeftConcatMasked(y Uint64x8, z Uint64x8, mask Mask64x8) Uint64x8
 
 /* ShiftLeftMasked */
 
@@ -10831,261 +10831,261 @@ func (x Uint64x4) ShiftRight(y Uint64x4) Uint64x4
 // Asm: VPSRLVQ, CPU Feature: AVX512F
 func (x Uint64x8) ShiftRight(y Uint64x8) Uint64x8
 
-/* ShiftRightAndFillUpperFrom */
+/* ShiftRightConcat */
 
-// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
 // Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
-func (x Int16x8) ShiftRightAndFillUpperFrom(y Int16x8, z Int16x8) Int16x8
+func (x Int16x8) ShiftRightConcat(y Int16x8, z Int16x8) Int16x8
 
-// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
 // Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
-func (x Int16x16) ShiftRightAndFillUpperFrom(y Int16x16, z Int16x16) Int16x16
+func (x Int16x16) ShiftRightConcat(y Int16x16, z Int16x16) Int16x16
 
-// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
 // Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
-func (x Int16x32) ShiftRightAndFillUpperFrom(y Int16x32, z Int16x32) Int16x32
+func (x Int16x32) ShiftRightConcat(y Int16x32, z Int16x32) Int16x32
 
-// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
 // Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
-func (x Int32x4) ShiftRightAndFillUpperFrom(y Int32x4, z Int32x4) Int32x4
+func (x Int32x4) ShiftRightConcat(y Int32x4, z Int32x4) Int32x4
 
-// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
 // Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
-func (x Int32x8) ShiftRightAndFillUpperFrom(y Int32x8, z Int32x8) Int32x8
+func (x Int32x8) ShiftRightConcat(y Int32x8, z Int32x8) Int32x8
 
-// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
 // Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
-func (x Int32x16) ShiftRightAndFillUpperFrom(y Int32x16, z Int32x16) Int32x16
+func (x Int32x16) ShiftRightConcat(y Int32x16, z Int32x16) Int32x16
 
-// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
 // Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Int64x2) ShiftRightAndFillUpperFrom(y Int64x2, z Int64x2) Int64x2
+func (x Int64x2) ShiftRightConcat(y Int64x2, z Int64x2) Int64x2
 
-// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
 // Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Int64x4) ShiftRightAndFillUpperFrom(y Int64x4, z Int64x4) Int64x4
+func (x Int64x4) ShiftRightConcat(y Int64x4, z Int64x4) Int64x4
 
-// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
 // Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Int64x8) ShiftRightAndFillUpperFrom(y Int64x8, z Int64x8) Int64x8
+func (x Int64x8) ShiftRightConcat(y Int64x8, z Int64x8) Int64x8
 
-// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
 // Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
-func (x Uint16x8) ShiftRightAndFillUpperFrom(y Uint16x8, z Uint16x8) Uint16x8
+func (x Uint16x8) ShiftRightConcat(y Uint16x8, z Uint16x8) Uint16x8
 
-// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
 // Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
-func (x Uint16x16) ShiftRightAndFillUpperFrom(y Uint16x16, z Uint16x16) Uint16x16
+func (x Uint16x16) ShiftRightConcat(y Uint16x16, z Uint16x16) Uint16x16
 
-// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
 // Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
-func (x Uint16x32) ShiftRightAndFillUpperFrom(y Uint16x32, z Uint16x32) Uint16x32
+func (x Uint16x32) ShiftRightConcat(y Uint16x32, z Uint16x32) Uint16x32
 
-// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
 // Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x4) ShiftRightAndFillUpperFrom(y Uint32x4, z Uint32x4) Uint32x4
+func (x Uint32x4) ShiftRightConcat(y Uint32x4, z Uint32x4) Uint32x4
 
-// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
 // Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x8) ShiftRightAndFillUpperFrom(y Uint32x8, z Uint32x8) Uint32x8
+func (x Uint32x8) ShiftRightConcat(y Uint32x8, z Uint32x8) Uint32x8
 
-// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
 // Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x16) ShiftRightAndFillUpperFrom(y Uint32x16, z Uint32x16) Uint32x16
+func (x Uint32x16) ShiftRightConcat(y Uint32x16, z Uint32x16) Uint32x16
 
-// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
 // Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x2) ShiftRightAndFillUpperFrom(y Uint64x2, z Uint64x2) Uint64x2
+func (x Uint64x2) ShiftRightConcat(y Uint64x2, z Uint64x2) Uint64x2
 
-// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
 // Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x4) ShiftRightAndFillUpperFrom(y Uint64x4, z Uint64x4) Uint64x4
+func (x Uint64x4) ShiftRightConcat(y Uint64x4, z Uint64x4) Uint64x4
 
-// ShiftRightAndFillUpperFrom shifts each element of x to the right by the number of bits specified by the
+// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
 // Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x8) ShiftRightAndFillUpperFrom(y Uint64x8, z Uint64x8) Uint64x8
+func (x Uint64x8) ShiftRightConcat(y Uint64x8, z Uint64x8) Uint64x8
 
-/* ShiftRightAndFillUpperFromMasked */
+/* ShiftRightConcatMasked */
 
-// ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
+// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
 //
 // Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
-func (x Int16x8) ShiftRightAndFillUpperFromMasked(y Int16x8, z Int16x8, mask Mask16x8) Int16x8
+func (x Int16x8) ShiftRightConcatMasked(y Int16x8, z Int16x8, mask Mask16x8) Int16x8
 
-// ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
+// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
 //
 // Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
-func (x Int16x16) ShiftRightAndFillUpperFromMasked(y Int16x16, z Int16x16, mask Mask16x16) Int16x16
+func (x Int16x16) ShiftRightConcatMasked(y Int16x16, z Int16x16, mask Mask16x16) Int16x16
 
-// ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
+// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
 //
 // Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
-func (x Int16x32) ShiftRightAndFillUpperFromMasked(y Int16x32, z Int16x32, mask Mask16x32) Int16x32
+func (x Int16x32) ShiftRightConcatMasked(y Int16x32, z Int16x32, mask Mask16x32) Int16x32
 
-// ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
+// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
 //
 // Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
-func (x Int32x4) ShiftRightAndFillUpperFromMasked(y Int32x4, z Int32x4, mask Mask32x4) Int32x4
+func (x Int32x4) ShiftRightConcatMasked(y Int32x4, z Int32x4, mask Mask32x4) Int32x4
 
-// ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
+// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
 //
 // Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
-func (x Int32x8) ShiftRightAndFillUpperFromMasked(y Int32x8, z Int32x8, mask Mask32x8) Int32x8
+func (x Int32x8) ShiftRightConcatMasked(y Int32x8, z Int32x8, mask Mask32x8) Int32x8
 
-// ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
+// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
 //
 // Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
-func (x Int32x16) ShiftRightAndFillUpperFromMasked(y Int32x16, z Int32x16, mask Mask32x16) Int32x16
+func (x Int32x16) ShiftRightConcatMasked(y Int32x16, z Int32x16, mask Mask32x16) Int32x16
 
-// ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
+// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
 //
 // Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Int64x2) ShiftRightAndFillUpperFromMasked(y Int64x2, z Int64x2, mask Mask64x2) Int64x2
+func (x Int64x2) ShiftRightConcatMasked(y Int64x2, z Int64x2, mask Mask64x2) Int64x2
 
-// ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
+// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
 //
 // Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Int64x4) ShiftRightAndFillUpperFromMasked(y Int64x4, z Int64x4, mask Mask64x4) Int64x4
+func (x Int64x4) ShiftRightConcatMasked(y Int64x4, z Int64x4, mask Mask64x4) Int64x4
 
-// ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
+// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
 //
 // Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Int64x8) ShiftRightAndFillUpperFromMasked(y Int64x8, z Int64x8, mask Mask64x8) Int64x8
+func (x Int64x8) ShiftRightConcatMasked(y Int64x8, z Int64x8, mask Mask64x8) Int64x8
 
-// ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
+// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
 //
 // Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
-func (x Uint16x8) ShiftRightAndFillUpperFromMasked(y Uint16x8, z Uint16x8, mask Mask16x8) Uint16x8
+func (x Uint16x8) ShiftRightConcatMasked(y Uint16x8, z Uint16x8, mask Mask16x8) Uint16x8
 
-// ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
+// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
 //
 // Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
-func (x Uint16x16) ShiftRightAndFillUpperFromMasked(y Uint16x16, z Uint16x16, mask Mask16x16) Uint16x16
+func (x Uint16x16) ShiftRightConcatMasked(y Uint16x16, z Uint16x16, mask Mask16x16) Uint16x16
 
-// ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
+// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
 //
 // Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
-func (x Uint16x32) ShiftRightAndFillUpperFromMasked(y Uint16x32, z Uint16x32, mask Mask16x32) Uint16x32
+func (x Uint16x32) ShiftRightConcatMasked(y Uint16x32, z Uint16x32, mask Mask16x32) Uint16x32
 
-// ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
+// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
 //
 // Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x4) ShiftRightAndFillUpperFromMasked(y Uint32x4, z Uint32x4, mask Mask32x4) Uint32x4
+func (x Uint32x4) ShiftRightConcatMasked(y Uint32x4, z Uint32x4, mask Mask32x4) Uint32x4
 
-// ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
+// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
 //
 // Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x8) ShiftRightAndFillUpperFromMasked(y Uint32x8, z Uint32x8, mask Mask32x8) Uint32x8
+func (x Uint32x8) ShiftRightConcatMasked(y Uint32x8, z Uint32x8, mask Mask32x8) Uint32x8
 
-// ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
+// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
 //
 // Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x16) ShiftRightAndFillUpperFromMasked(y Uint32x16, z Uint32x16, mask Mask32x16) Uint32x16
+func (x Uint32x16) ShiftRightConcatMasked(y Uint32x16, z Uint32x16, mask Mask32x16) Uint32x16
 
-// ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
+// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
 //
 // Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x2) ShiftRightAndFillUpperFromMasked(y Uint64x2, z Uint64x2, mask Mask64x2) Uint64x2
+func (x Uint64x2) ShiftRightConcatMasked(y Uint64x2, z Uint64x2, mask Mask64x2) Uint64x2
 
-// ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
+// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
 //
 // Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x4) ShiftRightAndFillUpperFromMasked(y Uint64x4, z Uint64x4, mask Mask64x4) Uint64x4
+func (x Uint64x4) ShiftRightConcatMasked(y Uint64x4, z Uint64x4, mask Mask64x4) Uint64x4
 
-// ShiftRightAndFillUpperFromMasked shifts each element of x to the right by the number of bits specified by the
+// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
 // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
 //
 // This operation is applied selectively under a write mask.
 //
 // Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x8) ShiftRightAndFillUpperFromMasked(y Uint64x8, z Uint64x8, mask Mask64x8) Uint64x8
+func (x Uint64x8) ShiftRightConcatMasked(y Uint64x8, z Uint64x8, mask Mask64x8) Uint64x8
 
 /* ShiftRightMasked */