]> Cypherpunks repositories - gostls13.git/commitdiff
[dev.simd] cmd/compile: generated code from 'fix generated rules for shifts'
authorDavid Chase <drchase@google.com>
Tue, 12 Aug 2025 21:01:55 +0000 (17:01 -0400)
committerDavid Chase <drchase@google.com>
Wed, 13 Aug 2025 17:28:09 +0000 (10:28 -0700)
this code is generated by simdgen CL 695455

Change-Id: I5afdc209a50b49d68e120130e0578e4666bf8749
Reviewed-on: https://go-review.googlesource.com/c/go/+/695475
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
src/cmd/compile/internal/ssa/rewriteAMD64.go

index abfa10020dec49ad524137187f6fb4a65819ea41..80cddaae79e2a141448be646838053f66fcf38ea 100644 (file)
 (SetLoUint32x16 x y) => (VINSERTI64X4512 [0] x y)
 (SetLoUint64x4 x y) => (VINSERTI128256 [0] x y)
 (SetLoUint64x8 x y) => (VINSERTI64X4512 [0] x y)
-(ShiftAllLeftInt16x8 x (MOVQconst [c])) => (VPSLLW128const [uint8(c)] x)
-(ShiftAllLeftInt16x8 x y) => (VPSLLW128 x y)
-(ShiftAllLeftInt16x16 x (MOVQconst [c])) => (VPSLLW256const [uint8(c)] x)
-(ShiftAllLeftInt16x16 x y) => (VPSLLW256 x y)
-(ShiftAllLeftInt16x32 x (MOVQconst [c])) => (VPSLLW512const [uint8(c)] x)
-(ShiftAllLeftInt16x32 x y) => (VPSLLW512 x y)
-(ShiftAllLeftInt32x4 x (MOVQconst [c])) => (VPSLLD128const [uint8(c)] x)
-(ShiftAllLeftInt32x4 x y) => (VPSLLD128 x y)
-(ShiftAllLeftInt32x8 x (MOVQconst [c])) => (VPSLLD256const [uint8(c)] x)
-(ShiftAllLeftInt32x8 x y) => (VPSLLD256 x y)
-(ShiftAllLeftInt32x16 x (MOVQconst [c])) => (VPSLLD512const [uint8(c)] x)
-(ShiftAllLeftInt32x16 x y) => (VPSLLD512 x y)
-(ShiftAllLeftInt64x2 x (MOVQconst [c])) => (VPSLLQ128const [uint8(c)] x)
-(ShiftAllLeftInt64x2 x y) => (VPSLLQ128 x y)
-(ShiftAllLeftInt64x4 x (MOVQconst [c])) => (VPSLLQ256const [uint8(c)] x)
-(ShiftAllLeftInt64x4 x y) => (VPSLLQ256 x y)
-(ShiftAllLeftInt64x8 x (MOVQconst [c])) => (VPSLLQ512const [uint8(c)] x)
-(ShiftAllLeftInt64x8 x y) => (VPSLLQ512 x y)
-(ShiftAllLeftUint16x8 x (MOVQconst [c])) => (VPSLLW128const [uint8(c)] x)
-(ShiftAllLeftUint16x8 x y) => (VPSLLW128 x y)
-(ShiftAllLeftUint16x16 x (MOVQconst [c])) => (VPSLLW256const [uint8(c)] x)
-(ShiftAllLeftUint16x16 x y) => (VPSLLW256 x y)
-(ShiftAllLeftUint16x32 x (MOVQconst [c])) => (VPSLLW512const [uint8(c)] x)
-(ShiftAllLeftUint16x32 x y) => (VPSLLW512 x y)
-(ShiftAllLeftUint32x4 x (MOVQconst [c])) => (VPSLLD128const [uint8(c)] x)
-(ShiftAllLeftUint32x4 x y) => (VPSLLD128 x y)
-(ShiftAllLeftUint32x8 x (MOVQconst [c])) => (VPSLLD256const [uint8(c)] x)
-(ShiftAllLeftUint32x8 x y) => (VPSLLD256 x y)
-(ShiftAllLeftUint32x16 x (MOVQconst [c])) => (VPSLLD512const [uint8(c)] x)
-(ShiftAllLeftUint32x16 x y) => (VPSLLD512 x y)
-(ShiftAllLeftUint64x2 x (MOVQconst [c])) => (VPSLLQ128const [uint8(c)] x)
-(ShiftAllLeftUint64x2 x y) => (VPSLLQ128 x y)
-(ShiftAllLeftUint64x4 x (MOVQconst [c])) => (VPSLLQ256const [uint8(c)] x)
-(ShiftAllLeftUint64x4 x y) => (VPSLLQ256 x y)
-(ShiftAllLeftUint64x8 x (MOVQconst [c])) => (VPSLLQ512const [uint8(c)] x)
-(ShiftAllLeftUint64x8 x y) => (VPSLLQ512 x y)
+(ShiftAllLeftInt16x8 ...) => (VPSLLW128 ...)
+(VPSLLW128 x (MOVQconst [c])) => (VPSLLW128const [uint8(c)] x)
+(ShiftAllLeftInt16x16 ...) => (VPSLLW256 ...)
+(VPSLLW256 x (MOVQconst [c])) => (VPSLLW256const [uint8(c)] x)
+(ShiftAllLeftInt16x32 ...) => (VPSLLW512 ...)
+(VPSLLW512 x (MOVQconst [c])) => (VPSLLW512const [uint8(c)] x)
+(ShiftAllLeftInt32x4 ...) => (VPSLLD128 ...)
+(VPSLLD128 x (MOVQconst [c])) => (VPSLLD128const [uint8(c)] x)
+(ShiftAllLeftInt32x8 ...) => (VPSLLD256 ...)
+(VPSLLD256 x (MOVQconst [c])) => (VPSLLD256const [uint8(c)] x)
+(ShiftAllLeftInt32x16 ...) => (VPSLLD512 ...)
+(VPSLLD512 x (MOVQconst [c])) => (VPSLLD512const [uint8(c)] x)
+(ShiftAllLeftInt64x2 ...) => (VPSLLQ128 ...)
+(VPSLLQ128 x (MOVQconst [c])) => (VPSLLQ128const [uint8(c)] x)
+(ShiftAllLeftInt64x4 ...) => (VPSLLQ256 ...)
+(VPSLLQ256 x (MOVQconst [c])) => (VPSLLQ256const [uint8(c)] x)
+(ShiftAllLeftInt64x8 ...) => (VPSLLQ512 ...)
+(VPSLLQ512 x (MOVQconst [c])) => (VPSLLQ512const [uint8(c)] x)
+(ShiftAllLeftUint16x8 ...) => (VPSLLW128 ...)
+(ShiftAllLeftUint16x16 ...) => (VPSLLW256 ...)
+(ShiftAllLeftUint16x32 ...) => (VPSLLW512 ...)
+(ShiftAllLeftUint32x4 ...) => (VPSLLD128 ...)
+(ShiftAllLeftUint32x8 ...) => (VPSLLD256 ...)
+(ShiftAllLeftUint32x16 ...) => (VPSLLD512 ...)
+(ShiftAllLeftUint64x2 ...) => (VPSLLQ128 ...)
+(ShiftAllLeftUint64x4 ...) => (VPSLLQ256 ...)
+(ShiftAllLeftUint64x8 ...) => (VPSLLQ512 ...)
 (ShiftAllLeftConcatInt16x8 ...) => (VPSHLDW128 ...)
 (ShiftAllLeftConcatInt16x16 ...) => (VPSHLDW256 ...)
 (ShiftAllLeftConcatInt16x32 ...) => (VPSHLDW512 ...)
 (ShiftAllLeftConcatMaskedUint64x2 [a] x y mask) => (VPSHLDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
 (ShiftAllLeftConcatMaskedUint64x4 [a] x y mask) => (VPSHLDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
 (ShiftAllLeftConcatMaskedUint64x8 [a] x y mask) => (VPSHLDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftAllLeftMaskedInt16x8 x (MOVQconst [c]) mask) => (VPSLLWMasked128const [uint8(c)] x (VPMOVVec16x8ToM <types.TypeMask> mask))
+(VPSLLWMasked128 x (MOVQconst [c]) mask) => (VPSLLWMasked128const [uint8(c)] x mask)
 (ShiftAllLeftMaskedInt16x8 x y mask) => (VPSLLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftAllLeftMaskedInt16x16 x (MOVQconst [c]) mask) => (VPSLLWMasked256const [uint8(c)] x (VPMOVVec16x16ToM <types.TypeMask> mask))
+(VPSLLWMasked256 x (MOVQconst [c]) mask) => (VPSLLWMasked256const [uint8(c)] x mask)
 (ShiftAllLeftMaskedInt16x16 x y mask) => (VPSLLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftAllLeftMaskedInt16x32 x (MOVQconst [c]) mask) => (VPSLLWMasked512const [uint8(c)] x (VPMOVVec16x32ToM <types.TypeMask> mask))
+(VPSLLWMasked512 x (MOVQconst [c]) mask) => (VPSLLWMasked512const [uint8(c)] x mask)
 (ShiftAllLeftMaskedInt16x32 x y mask) => (VPSLLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftAllLeftMaskedInt32x4 x (MOVQconst [c]) mask) => (VPSLLDMasked128const [uint8(c)] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(VPSLLDMasked128 x (MOVQconst [c]) mask) => (VPSLLDMasked128const [uint8(c)] x mask)
 (ShiftAllLeftMaskedInt32x4 x y mask) => (VPSLLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftAllLeftMaskedInt32x8 x (MOVQconst [c]) mask) => (VPSLLDMasked256const [uint8(c)] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(VPSLLDMasked256 x (MOVQconst [c]) mask) => (VPSLLDMasked256const [uint8(c)] x mask)
 (ShiftAllLeftMaskedInt32x8 x y mask) => (VPSLLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftAllLeftMaskedInt32x16 x (MOVQconst [c]) mask) => (VPSLLDMasked512const [uint8(c)] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+(VPSLLDMasked512 x (MOVQconst [c]) mask) => (VPSLLDMasked512const [uint8(c)] x mask)
 (ShiftAllLeftMaskedInt32x16 x y mask) => (VPSLLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftAllLeftMaskedInt64x2 x (MOVQconst [c]) mask) => (VPSLLQMasked128const [uint8(c)] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(VPSLLQMasked128 x (MOVQconst [c]) mask) => (VPSLLQMasked128const [uint8(c)] x mask)
 (ShiftAllLeftMaskedInt64x2 x y mask) => (VPSLLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftAllLeftMaskedInt64x4 x (MOVQconst [c]) mask) => (VPSLLQMasked256const [uint8(c)] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(VPSLLQMasked256 x (MOVQconst [c]) mask) => (VPSLLQMasked256const [uint8(c)] x mask)
 (ShiftAllLeftMaskedInt64x4 x y mask) => (VPSLLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftAllLeftMaskedInt64x8 x (MOVQconst [c]) mask) => (VPSLLQMasked512const [uint8(c)] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+(VPSLLQMasked512 x (MOVQconst [c]) mask) => (VPSLLQMasked512const [uint8(c)] x mask)
 (ShiftAllLeftMaskedInt64x8 x y mask) => (VPSLLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftAllLeftMaskedUint16x8 x (MOVQconst [c]) mask) => (VPSLLWMasked128const [uint8(c)] x (VPMOVVec16x8ToM <types.TypeMask> mask))
 (ShiftAllLeftMaskedUint16x8 x y mask) => (VPSLLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftAllLeftMaskedUint16x16 x (MOVQconst [c]) mask) => (VPSLLWMasked256const [uint8(c)] x (VPMOVVec16x16ToM <types.TypeMask> mask))
 (ShiftAllLeftMaskedUint16x16 x y mask) => (VPSLLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftAllLeftMaskedUint16x32 x (MOVQconst [c]) mask) => (VPSLLWMasked512const [uint8(c)] x (VPMOVVec16x32ToM <types.TypeMask> mask))
 (ShiftAllLeftMaskedUint16x32 x y mask) => (VPSLLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftAllLeftMaskedUint32x4 x (MOVQconst [c]) mask) => (VPSLLDMasked128const [uint8(c)] x (VPMOVVec32x4ToM <types.TypeMask> mask))
 (ShiftAllLeftMaskedUint32x4 x y mask) => (VPSLLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftAllLeftMaskedUint32x8 x (MOVQconst [c]) mask) => (VPSLLDMasked256const [uint8(c)] x (VPMOVVec32x8ToM <types.TypeMask> mask))
 (ShiftAllLeftMaskedUint32x8 x y mask) => (VPSLLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftAllLeftMaskedUint32x16 x (MOVQconst [c]) mask) => (VPSLLDMasked512const [uint8(c)] x (VPMOVVec32x16ToM <types.TypeMask> mask))
 (ShiftAllLeftMaskedUint32x16 x y mask) => (VPSLLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftAllLeftMaskedUint64x2 x (MOVQconst [c]) mask) => (VPSLLQMasked128const [uint8(c)] x (VPMOVVec64x2ToM <types.TypeMask> mask))
 (ShiftAllLeftMaskedUint64x2 x y mask) => (VPSLLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftAllLeftMaskedUint64x4 x (MOVQconst [c]) mask) => (VPSLLQMasked256const [uint8(c)] x (VPMOVVec64x4ToM <types.TypeMask> mask))
 (ShiftAllLeftMaskedUint64x4 x y mask) => (VPSLLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftAllLeftMaskedUint64x8 x (MOVQconst [c]) mask) => (VPSLLQMasked512const [uint8(c)] x (VPMOVVec64x8ToM <types.TypeMask> mask))
 (ShiftAllLeftMaskedUint64x8 x y mask) => (VPSLLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftAllRightInt16x8 x (MOVQconst [c])) => (VPSRAW128const [uint8(c)] x)
-(ShiftAllRightInt16x8 x y) => (VPSRAW128 x y)
-(ShiftAllRightInt16x16 x (MOVQconst [c])) => (VPSRAW256const [uint8(c)] x)
-(ShiftAllRightInt16x16 x y) => (VPSRAW256 x y)
-(ShiftAllRightInt16x32 x (MOVQconst [c])) => (VPSRAW512const [uint8(c)] x)
-(ShiftAllRightInt16x32 x y) => (VPSRAW512 x y)
-(ShiftAllRightInt32x4 x (MOVQconst [c])) => (VPSRAD128const [uint8(c)] x)
-(ShiftAllRightInt32x4 x y) => (VPSRAD128 x y)
-(ShiftAllRightInt32x8 x (MOVQconst [c])) => (VPSRAD256const [uint8(c)] x)
-(ShiftAllRightInt32x8 x y) => (VPSRAD256 x y)
-(ShiftAllRightInt32x16 x (MOVQconst [c])) => (VPSRAD512const [uint8(c)] x)
-(ShiftAllRightInt32x16 x y) => (VPSRAD512 x y)
-(ShiftAllRightInt64x2 x (MOVQconst [c])) => (VPSRAQ128const [uint8(c)] x)
-(ShiftAllRightInt64x2 x y) => (VPSRAQ128 x y)
-(ShiftAllRightInt64x4 x (MOVQconst [c])) => (VPSRAQ256const [uint8(c)] x)
-(ShiftAllRightInt64x4 x y) => (VPSRAQ256 x y)
-(ShiftAllRightInt64x8 x (MOVQconst [c])) => (VPSRAQ512const [uint8(c)] x)
-(ShiftAllRightInt64x8 x y) => (VPSRAQ512 x y)
-(ShiftAllRightUint16x8 x (MOVQconst [c])) => (VPSRLW128const [uint8(c)] x)
-(ShiftAllRightUint16x8 x y) => (VPSRLW128 x y)
-(ShiftAllRightUint16x16 x (MOVQconst [c])) => (VPSRLW256const [uint8(c)] x)
-(ShiftAllRightUint16x16 x y) => (VPSRLW256 x y)
-(ShiftAllRightUint16x32 x (MOVQconst [c])) => (VPSRLW512const [uint8(c)] x)
-(ShiftAllRightUint16x32 x y) => (VPSRLW512 x y)
-(ShiftAllRightUint32x4 x (MOVQconst [c])) => (VPSRLD128const [uint8(c)] x)
-(ShiftAllRightUint32x4 x y) => (VPSRLD128 x y)
-(ShiftAllRightUint32x8 x (MOVQconst [c])) => (VPSRLD256const [uint8(c)] x)
-(ShiftAllRightUint32x8 x y) => (VPSRLD256 x y)
-(ShiftAllRightUint32x16 x (MOVQconst [c])) => (VPSRLD512const [uint8(c)] x)
-(ShiftAllRightUint32x16 x y) => (VPSRLD512 x y)
-(ShiftAllRightUint64x2 x (MOVQconst [c])) => (VPSRLQ128const [uint8(c)] x)
-(ShiftAllRightUint64x2 x y) => (VPSRLQ128 x y)
-(ShiftAllRightUint64x4 x (MOVQconst [c])) => (VPSRLQ256const [uint8(c)] x)
-(ShiftAllRightUint64x4 x y) => (VPSRLQ256 x y)
-(ShiftAllRightUint64x8 x (MOVQconst [c])) => (VPSRLQ512const [uint8(c)] x)
-(ShiftAllRightUint64x8 x y) => (VPSRLQ512 x y)
+(ShiftAllRightInt16x8 ...) => (VPSRAW128 ...)
+(VPSRAW128 x (MOVQconst [c])) => (VPSRAW128const [uint8(c)] x)
+(ShiftAllRightInt16x16 ...) => (VPSRAW256 ...)
+(VPSRAW256 x (MOVQconst [c])) => (VPSRAW256const [uint8(c)] x)
+(ShiftAllRightInt16x32 ...) => (VPSRAW512 ...)
+(VPSRAW512 x (MOVQconst [c])) => (VPSRAW512const [uint8(c)] x)
+(ShiftAllRightInt32x4 ...) => (VPSRAD128 ...)
+(VPSRAD128 x (MOVQconst [c])) => (VPSRAD128const [uint8(c)] x)
+(ShiftAllRightInt32x8 ...) => (VPSRAD256 ...)
+(VPSRAD256 x (MOVQconst [c])) => (VPSRAD256const [uint8(c)] x)
+(ShiftAllRightInt32x16 ...) => (VPSRAD512 ...)
+(VPSRAD512 x (MOVQconst [c])) => (VPSRAD512const [uint8(c)] x)
+(ShiftAllRightInt64x2 ...) => (VPSRAQ128 ...)
+(VPSRAQ128 x (MOVQconst [c])) => (VPSRAQ128const [uint8(c)] x)
+(ShiftAllRightInt64x4 ...) => (VPSRAQ256 ...)
+(VPSRAQ256 x (MOVQconst [c])) => (VPSRAQ256const [uint8(c)] x)
+(ShiftAllRightInt64x8 ...) => (VPSRAQ512 ...)
+(VPSRAQ512 x (MOVQconst [c])) => (VPSRAQ512const [uint8(c)] x)
+(ShiftAllRightUint16x8 ...) => (VPSRLW128 ...)
+(ShiftAllRightUint16x16 ...) => (VPSRLW256 ...)
+(ShiftAllRightUint16x32 ...) => (VPSRLW512 ...)
+(ShiftAllRightUint32x4 ...) => (VPSRLD128 ...)
+(ShiftAllRightUint32x8 ...) => (VPSRLD256 ...)
+(ShiftAllRightUint32x16 ...) => (VPSRLD512 ...)
+(ShiftAllRightUint64x2 ...) => (VPSRLQ128 ...)
+(ShiftAllRightUint64x4 ...) => (VPSRLQ256 ...)
+(ShiftAllRightUint64x8 ...) => (VPSRLQ512 ...)
 (ShiftAllRightConcatInt16x8 ...) => (VPSHRDW128 ...)
 (ShiftAllRightConcatInt16x16 ...) => (VPSHRDW256 ...)
 (ShiftAllRightConcatInt16x32 ...) => (VPSHRDW512 ...)
 (ShiftAllRightConcatMaskedUint64x2 [a] x y mask) => (VPSHRDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
 (ShiftAllRightConcatMaskedUint64x4 [a] x y mask) => (VPSHRDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
 (ShiftAllRightConcatMaskedUint64x8 [a] x y mask) => (VPSHRDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedInt16x8 x (MOVQconst [c]) mask) => (VPSRAWMasked128const [uint8(c)] x (VPMOVVec16x8ToM <types.TypeMask> mask))
+(VPSRAWMasked128 x (MOVQconst [c]) mask) => (VPSRAWMasked128const [uint8(c)] x mask)
 (ShiftAllRightMaskedInt16x8 x y mask) => (VPSRAWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedInt16x16 x (MOVQconst [c]) mask) => (VPSRAWMasked256const [uint8(c)] x (VPMOVVec16x16ToM <types.TypeMask> mask))
+(VPSRAWMasked256 x (MOVQconst [c]) mask) => (VPSRAWMasked256const [uint8(c)] x mask)
 (ShiftAllRightMaskedInt16x16 x y mask) => (VPSRAWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedInt16x32 x (MOVQconst [c]) mask) => (VPSRAWMasked512const [uint8(c)] x (VPMOVVec16x32ToM <types.TypeMask> mask))
+(VPSRAWMasked512 x (MOVQconst [c]) mask) => (VPSRAWMasked512const [uint8(c)] x mask)
 (ShiftAllRightMaskedInt16x32 x y mask) => (VPSRAWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedInt32x4 x (MOVQconst [c]) mask) => (VPSRADMasked128const [uint8(c)] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(VPSRADMasked128 x (MOVQconst [c]) mask) => (VPSRADMasked128const [uint8(c)] x mask)
 (ShiftAllRightMaskedInt32x4 x y mask) => (VPSRADMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedInt32x8 x (MOVQconst [c]) mask) => (VPSRADMasked256const [uint8(c)] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(VPSRADMasked256 x (MOVQconst [c]) mask) => (VPSRADMasked256const [uint8(c)] x mask)
 (ShiftAllRightMaskedInt32x8 x y mask) => (VPSRADMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedInt32x16 x (MOVQconst [c]) mask) => (VPSRADMasked512const [uint8(c)] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+(VPSRADMasked512 x (MOVQconst [c]) mask) => (VPSRADMasked512const [uint8(c)] x mask)
 (ShiftAllRightMaskedInt32x16 x y mask) => (VPSRADMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedInt64x2 x (MOVQconst [c]) mask) => (VPSRAQMasked128const [uint8(c)] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(VPSRAQMasked128 x (MOVQconst [c]) mask) => (VPSRAQMasked128const [uint8(c)] x mask)
 (ShiftAllRightMaskedInt64x2 x y mask) => (VPSRAQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedInt64x4 x (MOVQconst [c]) mask) => (VPSRAQMasked256const [uint8(c)] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(VPSRAQMasked256 x (MOVQconst [c]) mask) => (VPSRAQMasked256const [uint8(c)] x mask)
 (ShiftAllRightMaskedInt64x4 x y mask) => (VPSRAQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedInt64x8 x (MOVQconst [c]) mask) => (VPSRAQMasked512const [uint8(c)] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+(VPSRAQMasked512 x (MOVQconst [c]) mask) => (VPSRAQMasked512const [uint8(c)] x mask)
 (ShiftAllRightMaskedInt64x8 x y mask) => (VPSRAQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedUint16x8 x (MOVQconst [c]) mask) => (VPSRLWMasked128const [uint8(c)] x (VPMOVVec16x8ToM <types.TypeMask> mask))
 (ShiftAllRightMaskedUint16x8 x y mask) => (VPSRLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedUint16x16 x (MOVQconst [c]) mask) => (VPSRLWMasked256const [uint8(c)] x (VPMOVVec16x16ToM <types.TypeMask> mask))
 (ShiftAllRightMaskedUint16x16 x y mask) => (VPSRLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedUint16x32 x (MOVQconst [c]) mask) => (VPSRLWMasked512const [uint8(c)] x (VPMOVVec16x32ToM <types.TypeMask> mask))
 (ShiftAllRightMaskedUint16x32 x y mask) => (VPSRLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedUint32x4 x (MOVQconst [c]) mask) => (VPSRLDMasked128const [uint8(c)] x (VPMOVVec32x4ToM <types.TypeMask> mask))
 (ShiftAllRightMaskedUint32x4 x y mask) => (VPSRLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedUint32x8 x (MOVQconst [c]) mask) => (VPSRLDMasked256const [uint8(c)] x (VPMOVVec32x8ToM <types.TypeMask> mask))
 (ShiftAllRightMaskedUint32x8 x y mask) => (VPSRLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedUint32x16 x (MOVQconst [c]) mask) => (VPSRLDMasked512const [uint8(c)] x (VPMOVVec32x16ToM <types.TypeMask> mask))
 (ShiftAllRightMaskedUint32x16 x y mask) => (VPSRLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedUint64x2 x (MOVQconst [c]) mask) => (VPSRLQMasked128const [uint8(c)] x (VPMOVVec64x2ToM <types.TypeMask> mask))
 (ShiftAllRightMaskedUint64x2 x y mask) => (VPSRLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedUint64x4 x (MOVQconst [c]) mask) => (VPSRLQMasked256const [uint8(c)] x (VPMOVVec64x4ToM <types.TypeMask> mask))
 (ShiftAllRightMaskedUint64x4 x y mask) => (VPSRLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedUint64x8 x (MOVQconst [c]) mask) => (VPSRLQMasked512const [uint8(c)] x (VPMOVVec64x8ToM <types.TypeMask> mask))
 (ShiftAllRightMaskedUint64x8 x y mask) => (VPSRLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (ShiftLeftInt16x8 ...) => (VPSLLVW128 ...)
 (ShiftLeftInt16x16 ...) => (VPSLLVW256 ...)
index fbe8a448d8d14aa5608edd9e82bc296b96d57158..c5367adefec4327a0360b959a8dd2c03e04b0399 100644 (file)
@@ -531,6 +531,78 @@ func rewriteValueAMD64(v *Value) bool {
                return rewriteValueAMD64_OpAMD64VPMOVVec8x32ToM(v)
        case OpAMD64VPMOVVec8x64ToM:
                return rewriteValueAMD64_OpAMD64VPMOVVec8x64ToM(v)
+       case OpAMD64VPSLLD128:
+               return rewriteValueAMD64_OpAMD64VPSLLD128(v)
+       case OpAMD64VPSLLD256:
+               return rewriteValueAMD64_OpAMD64VPSLLD256(v)
+       case OpAMD64VPSLLD512:
+               return rewriteValueAMD64_OpAMD64VPSLLD512(v)
+       case OpAMD64VPSLLDMasked128:
+               return rewriteValueAMD64_OpAMD64VPSLLDMasked128(v)
+       case OpAMD64VPSLLDMasked256:
+               return rewriteValueAMD64_OpAMD64VPSLLDMasked256(v)
+       case OpAMD64VPSLLDMasked512:
+               return rewriteValueAMD64_OpAMD64VPSLLDMasked512(v)
+       case OpAMD64VPSLLQ128:
+               return rewriteValueAMD64_OpAMD64VPSLLQ128(v)
+       case OpAMD64VPSLLQ256:
+               return rewriteValueAMD64_OpAMD64VPSLLQ256(v)
+       case OpAMD64VPSLLQ512:
+               return rewriteValueAMD64_OpAMD64VPSLLQ512(v)
+       case OpAMD64VPSLLQMasked128:
+               return rewriteValueAMD64_OpAMD64VPSLLQMasked128(v)
+       case OpAMD64VPSLLQMasked256:
+               return rewriteValueAMD64_OpAMD64VPSLLQMasked256(v)
+       case OpAMD64VPSLLQMasked512:
+               return rewriteValueAMD64_OpAMD64VPSLLQMasked512(v)
+       case OpAMD64VPSLLW128:
+               return rewriteValueAMD64_OpAMD64VPSLLW128(v)
+       case OpAMD64VPSLLW256:
+               return rewriteValueAMD64_OpAMD64VPSLLW256(v)
+       case OpAMD64VPSLLW512:
+               return rewriteValueAMD64_OpAMD64VPSLLW512(v)
+       case OpAMD64VPSLLWMasked128:
+               return rewriteValueAMD64_OpAMD64VPSLLWMasked128(v)
+       case OpAMD64VPSLLWMasked256:
+               return rewriteValueAMD64_OpAMD64VPSLLWMasked256(v)
+       case OpAMD64VPSLLWMasked512:
+               return rewriteValueAMD64_OpAMD64VPSLLWMasked512(v)
+       case OpAMD64VPSRAD128:
+               return rewriteValueAMD64_OpAMD64VPSRAD128(v)
+       case OpAMD64VPSRAD256:
+               return rewriteValueAMD64_OpAMD64VPSRAD256(v)
+       case OpAMD64VPSRAD512:
+               return rewriteValueAMD64_OpAMD64VPSRAD512(v)
+       case OpAMD64VPSRADMasked128:
+               return rewriteValueAMD64_OpAMD64VPSRADMasked128(v)
+       case OpAMD64VPSRADMasked256:
+               return rewriteValueAMD64_OpAMD64VPSRADMasked256(v)
+       case OpAMD64VPSRADMasked512:
+               return rewriteValueAMD64_OpAMD64VPSRADMasked512(v)
+       case OpAMD64VPSRAQ128:
+               return rewriteValueAMD64_OpAMD64VPSRAQ128(v)
+       case OpAMD64VPSRAQ256:
+               return rewriteValueAMD64_OpAMD64VPSRAQ256(v)
+       case OpAMD64VPSRAQ512:
+               return rewriteValueAMD64_OpAMD64VPSRAQ512(v)
+       case OpAMD64VPSRAQMasked128:
+               return rewriteValueAMD64_OpAMD64VPSRAQMasked128(v)
+       case OpAMD64VPSRAQMasked256:
+               return rewriteValueAMD64_OpAMD64VPSRAQMasked256(v)
+       case OpAMD64VPSRAQMasked512:
+               return rewriteValueAMD64_OpAMD64VPSRAQMasked512(v)
+       case OpAMD64VPSRAW128:
+               return rewriteValueAMD64_OpAMD64VPSRAW128(v)
+       case OpAMD64VPSRAW256:
+               return rewriteValueAMD64_OpAMD64VPSRAW256(v)
+       case OpAMD64VPSRAW512:
+               return rewriteValueAMD64_OpAMD64VPSRAW512(v)
+       case OpAMD64VPSRAWMasked128:
+               return rewriteValueAMD64_OpAMD64VPSRAWMasked128(v)
+       case OpAMD64VPSRAWMasked256:
+               return rewriteValueAMD64_OpAMD64VPSRAWMasked256(v)
+       case OpAMD64VPSRAWMasked512:
+               return rewriteValueAMD64_OpAMD64VPSRAWMasked512(v)
        case OpAMD64XADDLlock:
                return rewriteValueAMD64_OpAMD64XADDLlock(v)
        case OpAMD64XADDQlock:
@@ -4662,23 +4734,32 @@ func rewriteValueAMD64(v *Value) bool {
                v.Op = OpAMD64VPSHLDQ512
                return true
        case OpShiftAllLeftInt16x16:
-               return rewriteValueAMD64_OpShiftAllLeftInt16x16(v)
+               v.Op = OpAMD64VPSLLW256
+               return true
        case OpShiftAllLeftInt16x32:
-               return rewriteValueAMD64_OpShiftAllLeftInt16x32(v)
+               v.Op = OpAMD64VPSLLW512
+               return true
        case OpShiftAllLeftInt16x8:
-               return rewriteValueAMD64_OpShiftAllLeftInt16x8(v)
+               v.Op = OpAMD64VPSLLW128
+               return true
        case OpShiftAllLeftInt32x16:
-               return rewriteValueAMD64_OpShiftAllLeftInt32x16(v)
+               v.Op = OpAMD64VPSLLD512
+               return true
        case OpShiftAllLeftInt32x4:
-               return rewriteValueAMD64_OpShiftAllLeftInt32x4(v)
+               v.Op = OpAMD64VPSLLD128
+               return true
        case OpShiftAllLeftInt32x8:
-               return rewriteValueAMD64_OpShiftAllLeftInt32x8(v)
+               v.Op = OpAMD64VPSLLD256
+               return true
        case OpShiftAllLeftInt64x2:
-               return rewriteValueAMD64_OpShiftAllLeftInt64x2(v)
+               v.Op = OpAMD64VPSLLQ128
+               return true
        case OpShiftAllLeftInt64x4:
-               return rewriteValueAMD64_OpShiftAllLeftInt64x4(v)
+               v.Op = OpAMD64VPSLLQ256
+               return true
        case OpShiftAllLeftInt64x8:
-               return rewriteValueAMD64_OpShiftAllLeftInt64x8(v)
+               v.Op = OpAMD64VPSLLQ512
+               return true
        case OpShiftAllLeftMaskedInt16x16:
                return rewriteValueAMD64_OpShiftAllLeftMaskedInt16x16(v)
        case OpShiftAllLeftMaskedInt16x32:
@@ -4716,23 +4797,32 @@ func rewriteValueAMD64(v *Value) bool {
        case OpShiftAllLeftMaskedUint64x8:
                return rewriteValueAMD64_OpShiftAllLeftMaskedUint64x8(v)
        case OpShiftAllLeftUint16x16:
-               return rewriteValueAMD64_OpShiftAllLeftUint16x16(v)
+               v.Op = OpAMD64VPSLLW256
+               return true
        case OpShiftAllLeftUint16x32:
-               return rewriteValueAMD64_OpShiftAllLeftUint16x32(v)
+               v.Op = OpAMD64VPSLLW512
+               return true
        case OpShiftAllLeftUint16x8:
-               return rewriteValueAMD64_OpShiftAllLeftUint16x8(v)
+               v.Op = OpAMD64VPSLLW128
+               return true
        case OpShiftAllLeftUint32x16:
-               return rewriteValueAMD64_OpShiftAllLeftUint32x16(v)
+               v.Op = OpAMD64VPSLLD512
+               return true
        case OpShiftAllLeftUint32x4:
-               return rewriteValueAMD64_OpShiftAllLeftUint32x4(v)
+               v.Op = OpAMD64VPSLLD128
+               return true
        case OpShiftAllLeftUint32x8:
-               return rewriteValueAMD64_OpShiftAllLeftUint32x8(v)
+               v.Op = OpAMD64VPSLLD256
+               return true
        case OpShiftAllLeftUint64x2:
-               return rewriteValueAMD64_OpShiftAllLeftUint64x2(v)
+               v.Op = OpAMD64VPSLLQ128
+               return true
        case OpShiftAllLeftUint64x4:
-               return rewriteValueAMD64_OpShiftAllLeftUint64x4(v)
+               v.Op = OpAMD64VPSLLQ256
+               return true
        case OpShiftAllLeftUint64x8:
-               return rewriteValueAMD64_OpShiftAllLeftUint64x8(v)
+               v.Op = OpAMD64VPSLLQ512
+               return true
        case OpShiftAllRightConcatInt16x16:
                v.Op = OpAMD64VPSHRDW256
                return true
@@ -4824,23 +4914,32 @@ func rewriteValueAMD64(v *Value) bool {
                v.Op = OpAMD64VPSHRDQ512
                return true
        case OpShiftAllRightInt16x16:
-               return rewriteValueAMD64_OpShiftAllRightInt16x16(v)
+               v.Op = OpAMD64VPSRAW256
+               return true
        case OpShiftAllRightInt16x32:
-               return rewriteValueAMD64_OpShiftAllRightInt16x32(v)
+               v.Op = OpAMD64VPSRAW512
+               return true
        case OpShiftAllRightInt16x8:
-               return rewriteValueAMD64_OpShiftAllRightInt16x8(v)
+               v.Op = OpAMD64VPSRAW128
+               return true
        case OpShiftAllRightInt32x16:
-               return rewriteValueAMD64_OpShiftAllRightInt32x16(v)
+               v.Op = OpAMD64VPSRAD512
+               return true
        case OpShiftAllRightInt32x4:
-               return rewriteValueAMD64_OpShiftAllRightInt32x4(v)
+               v.Op = OpAMD64VPSRAD128
+               return true
        case OpShiftAllRightInt32x8:
-               return rewriteValueAMD64_OpShiftAllRightInt32x8(v)
+               v.Op = OpAMD64VPSRAD256
+               return true
        case OpShiftAllRightInt64x2:
-               return rewriteValueAMD64_OpShiftAllRightInt64x2(v)
+               v.Op = OpAMD64VPSRAQ128
+               return true
        case OpShiftAllRightInt64x4:
-               return rewriteValueAMD64_OpShiftAllRightInt64x4(v)
+               v.Op = OpAMD64VPSRAQ256
+               return true
        case OpShiftAllRightInt64x8:
-               return rewriteValueAMD64_OpShiftAllRightInt64x8(v)
+               v.Op = OpAMD64VPSRAQ512
+               return true
        case OpShiftAllRightMaskedInt16x16:
                return rewriteValueAMD64_OpShiftAllRightMaskedInt16x16(v)
        case OpShiftAllRightMaskedInt16x32:
@@ -4878,23 +4977,32 @@ func rewriteValueAMD64(v *Value) bool {
        case OpShiftAllRightMaskedUint64x8:
                return rewriteValueAMD64_OpShiftAllRightMaskedUint64x8(v)
        case OpShiftAllRightUint16x16:
-               return rewriteValueAMD64_OpShiftAllRightUint16x16(v)
+               v.Op = OpAMD64VPSRLW256
+               return true
        case OpShiftAllRightUint16x32:
-               return rewriteValueAMD64_OpShiftAllRightUint16x32(v)
+               v.Op = OpAMD64VPSRLW512
+               return true
        case OpShiftAllRightUint16x8:
-               return rewriteValueAMD64_OpShiftAllRightUint16x8(v)
+               v.Op = OpAMD64VPSRLW128
+               return true
        case OpShiftAllRightUint32x16:
-               return rewriteValueAMD64_OpShiftAllRightUint32x16(v)
+               v.Op = OpAMD64VPSRLD512
+               return true
        case OpShiftAllRightUint32x4:
-               return rewriteValueAMD64_OpShiftAllRightUint32x4(v)
+               v.Op = OpAMD64VPSRLD128
+               return true
        case OpShiftAllRightUint32x8:
-               return rewriteValueAMD64_OpShiftAllRightUint32x8(v)
+               v.Op = OpAMD64VPSRLD256
+               return true
        case OpShiftAllRightUint64x2:
-               return rewriteValueAMD64_OpShiftAllRightUint64x2(v)
+               v.Op = OpAMD64VPSRLQ128
+               return true
        case OpShiftAllRightUint64x4:
-               return rewriteValueAMD64_OpShiftAllRightUint64x4(v)
+               v.Op = OpAMD64VPSRLQ256
+               return true
        case OpShiftAllRightUint64x8:
-               return rewriteValueAMD64_OpShiftAllRightUint64x8(v)
+               v.Op = OpAMD64VPSRLQ512
+               return true
        case OpShiftLeftConcatInt16x16:
                v.Op = OpAMD64VPSHLDVW256
                return true
@@ -27713,416 +27821,1100 @@ func rewriteValueAMD64_OpAMD64VPMOVVec8x64ToM(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64XADDLlock(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpAMD64VPSLLD128(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (XADDLlock [off1] {sym} val (ADDQconst [off2] ptr) mem)
-       // cond: is32Bit(int64(off1)+int64(off2))
-       // result: (XADDLlock [off1+off2] {sym} val ptr mem)
+       // match: (VPSLLD128 x (MOVQconst [c]))
+       // result: (VPSLLD128const [uint8(c)] x)
        for {
-               off1 := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               val := v_0
-               if v_1.Op != OpAMD64ADDQconst {
-                       break
-               }
-               off2 := auxIntToInt32(v_1.AuxInt)
-               ptr := v_1.Args[0]
-               mem := v_2
-               if !(is32Bit(int64(off1) + int64(off2))) {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
                        break
                }
-               v.reset(OpAMD64XADDLlock)
-               v.AuxInt = int32ToAuxInt(off1 + off2)
-               v.Aux = symToAux(sym)
-               v.AddArg3(val, ptr, mem)
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64VPSLLD128const)
+               v.AuxInt = uint8ToAuxInt(uint8(c))
+               v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64XADDQlock(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpAMD64VPSLLD256(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (XADDQlock [off1] {sym} val (ADDQconst [off2] ptr) mem)
-       // cond: is32Bit(int64(off1)+int64(off2))
-       // result: (XADDQlock [off1+off2] {sym} val ptr mem)
+       // match: (VPSLLD256 x (MOVQconst [c]))
+       // result: (VPSLLD256const [uint8(c)] x)
        for {
-               off1 := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               val := v_0
-               if v_1.Op != OpAMD64ADDQconst {
-                       break
-               }
-               off2 := auxIntToInt32(v_1.AuxInt)
-               ptr := v_1.Args[0]
-               mem := v_2
-               if !(is32Bit(int64(off1) + int64(off2))) {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
                        break
                }
-               v.reset(OpAMD64XADDQlock)
-               v.AuxInt = int32ToAuxInt(off1 + off2)
-               v.Aux = symToAux(sym)
-               v.AddArg3(val, ptr, mem)
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64VPSLLD256const)
+               v.AuxInt = uint8ToAuxInt(uint8(c))
+               v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64XCHGL(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpAMD64VPSLLD512(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (XCHGL [off1] {sym} val (ADDQconst [off2] ptr) mem)
-       // cond: is32Bit(int64(off1)+int64(off2))
-       // result: (XCHGL [off1+off2] {sym} val ptr mem)
+       // match: (VPSLLD512 x (MOVQconst [c]))
+       // result: (VPSLLD512const [uint8(c)] x)
        for {
-               off1 := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               val := v_0
-               if v_1.Op != OpAMD64ADDQconst {
-                       break
-               }
-               off2 := auxIntToInt32(v_1.AuxInt)
-               ptr := v_1.Args[0]
-               mem := v_2
-               if !(is32Bit(int64(off1) + int64(off2))) {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
                        break
                }
-               v.reset(OpAMD64XCHGL)
-               v.AuxInt = int32ToAuxInt(off1 + off2)
-               v.Aux = symToAux(sym)
-               v.AddArg3(val, ptr, mem)
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64VPSLLD512const)
+               v.AuxInt = uint8ToAuxInt(uint8(c))
+               v.AddArg(x)
                return true
        }
-       // match: (XCHGL [off1] {sym1} val (LEAQ [off2] {sym2} ptr) mem)
-       // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && ptr.Op != OpSB
-       // result: (XCHGL [off1+off2] {mergeSym(sym1,sym2)} val ptr mem)
+       return false
+}
+func rewriteValueAMD64_OpAMD64VPSLLDMasked128(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (VPSLLDMasked128 x (MOVQconst [c]) mask)
+       // result: (VPSLLDMasked128const [uint8(c)] x mask)
        for {
-               off1 := auxIntToInt32(v.AuxInt)
-               sym1 := auxToSym(v.Aux)
-               val := v_0
-               if v_1.Op != OpAMD64LEAQ {
-                       break
-               }
-               off2 := auxIntToInt32(v_1.AuxInt)
-               sym2 := auxToSym(v_1.Aux)
-               ptr := v_1.Args[0]
-               mem := v_2
-               if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && ptr.Op != OpSB) {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
                        break
                }
-               v.reset(OpAMD64XCHGL)
-               v.AuxInt = int32ToAuxInt(off1 + off2)
-               v.Aux = symToAux(mergeSym(sym1, sym2))
-               v.AddArg3(val, ptr, mem)
+               c := auxIntToInt64(v_1.AuxInt)
+               mask := v_2
+               v.reset(OpAMD64VPSLLDMasked128const)
+               v.AuxInt = uint8ToAuxInt(uint8(c))
+               v.AddArg2(x, mask)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64XCHGQ(v *Value) bool {
+func rewriteValueAMD64_OpAMD64VPSLLDMasked256(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (XCHGQ [off1] {sym} val (ADDQconst [off2] ptr) mem)
-       // cond: is32Bit(int64(off1)+int64(off2))
-       // result: (XCHGQ [off1+off2] {sym} val ptr mem)
+       // match: (VPSLLDMasked256 x (MOVQconst [c]) mask)
+       // result: (VPSLLDMasked256const [uint8(c)] x mask)
        for {
-               off1 := auxIntToInt32(v.AuxInt)
-               sym := auxToSym(v.Aux)
-               val := v_0
-               if v_1.Op != OpAMD64ADDQconst {
-                       break
-               }
-               off2 := auxIntToInt32(v_1.AuxInt)
-               ptr := v_1.Args[0]
-               mem := v_2
-               if !(is32Bit(int64(off1) + int64(off2))) {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
                        break
                }
-               v.reset(OpAMD64XCHGQ)
-               v.AuxInt = int32ToAuxInt(off1 + off2)
-               v.Aux = symToAux(sym)
-               v.AddArg3(val, ptr, mem)
+               c := auxIntToInt64(v_1.AuxInt)
+               mask := v_2
+               v.reset(OpAMD64VPSLLDMasked256const)
+               v.AuxInt = uint8ToAuxInt(uint8(c))
+               v.AddArg2(x, mask)
                return true
        }
-       // match: (XCHGQ [off1] {sym1} val (LEAQ [off2] {sym2} ptr) mem)
-       // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && ptr.Op != OpSB
-       // result: (XCHGQ [off1+off2] {mergeSym(sym1,sym2)} val ptr mem)
+       return false
+}
+func rewriteValueAMD64_OpAMD64VPSLLDMasked512(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (VPSLLDMasked512 x (MOVQconst [c]) mask)
+       // result: (VPSLLDMasked512const [uint8(c)] x mask)
        for {
-               off1 := auxIntToInt32(v.AuxInt)
-               sym1 := auxToSym(v.Aux)
-               val := v_0
-               if v_1.Op != OpAMD64LEAQ {
-                       break
-               }
-               off2 := auxIntToInt32(v_1.AuxInt)
-               sym2 := auxToSym(v_1.Aux)
-               ptr := v_1.Args[0]
-               mem := v_2
-               if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && ptr.Op != OpSB) {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
                        break
                }
-               v.reset(OpAMD64XCHGQ)
-               v.AuxInt = int32ToAuxInt(off1 + off2)
-               v.Aux = symToAux(mergeSym(sym1, sym2))
-               v.AddArg3(val, ptr, mem)
+               c := auxIntToInt64(v_1.AuxInt)
+               mask := v_2
+               v.reset(OpAMD64VPSLLDMasked512const)
+               v.AuxInt = uint8ToAuxInt(uint8(c))
+               v.AddArg2(x, mask)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64XORL(v *Value) bool {
+func rewriteValueAMD64_OpAMD64VPSLLQ128(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (XORL (SHLL (MOVLconst [1]) y) x)
-       // result: (BTCL x y)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       if v_0.Op != OpAMD64SHLL {
-                               continue
-                       }
-                       y := v_0.Args[1]
-                       v_0_0 := v_0.Args[0]
-                       if v_0_0.Op != OpAMD64MOVLconst || auxIntToInt32(v_0_0.AuxInt) != 1 {
-                               continue
-                       }
-                       x := v_1
-                       v.reset(OpAMD64BTCL)
-                       v.AddArg2(x, y)
-                       return true
-               }
-               break
-       }
-       // match: (XORL x (MOVLconst [c]))
-       // result: (XORLconst [c] x)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x := v_0
-                       if v_1.Op != OpAMD64MOVLconst {
-                               continue
-                       }
-                       c := auxIntToInt32(v_1.AuxInt)
-                       v.reset(OpAMD64XORLconst)
-                       v.AuxInt = int32ToAuxInt(c)
-                       v.AddArg(x)
-                       return true
-               }
-               break
-       }
-       // match: (XORL x x)
-       // result: (MOVLconst [0])
+       // match: (VPSLLQ128 x (MOVQconst [c]))
+       // result: (VPSLLQ128const [uint8(c)] x)
        for {
                x := v_0
-               if x != v_1 {
+               if v_1.Op != OpAMD64MOVQconst {
                        break
                }
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = int32ToAuxInt(0)
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64VPSLLQ128const)
+               v.AuxInt = uint8ToAuxInt(uint8(c))
+               v.AddArg(x)
                return true
        }
-       // match: (XORL x l:(MOVLload [off] {sym} ptr mem))
-       // cond: canMergeLoadClobber(v, l, x) && clobber(l)
-       // result: (XORLload x [off] {sym} ptr mem)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x := v_0
-                       l := v_1
-                       if l.Op != OpAMD64MOVLload {
-                               continue
-                       }
-                       off := auxIntToInt32(l.AuxInt)
-                       sym := auxToSym(l.Aux)
-                       mem := l.Args[1]
-                       ptr := l.Args[0]
-                       if !(canMergeLoadClobber(v, l, x) && clobber(l)) {
-                               continue
-                       }
-                       v.reset(OpAMD64XORLload)
-                       v.AuxInt = int32ToAuxInt(off)
-                       v.Aux = symToAux(sym)
-                       v.AddArg3(x, ptr, mem)
-                       return true
-               }
-               break
-       }
-       // match: (XORL x (ADDLconst [-1] x))
-       // cond: buildcfg.GOAMD64 >= 3
-       // result: (BLSMSKL x)
+       return false
+}
+func rewriteValueAMD64_OpAMD64VPSLLQ256(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (VPSLLQ256 x (MOVQconst [c]))
+       // result: (VPSLLQ256const [uint8(c)] x)
        for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x := v_0
-                       if v_1.Op != OpAMD64ADDLconst || auxIntToInt32(v_1.AuxInt) != -1 || x != v_1.Args[0] || !(buildcfg.GOAMD64 >= 3) {
-                               continue
-                       }
-                       v.reset(OpAMD64BLSMSKL)
-                       v.AddArg(x)
-                       return true
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
                }
-               break
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64VPSLLQ256const)
+               v.AuxInt = uint8ToAuxInt(uint8(c))
+               v.AddArg(x)
+               return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64XORLconst(v *Value) bool {
+func rewriteValueAMD64_OpAMD64VPSLLQ512(v *Value) bool {
+       v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (XORLconst [1] (SETNE x))
-       // result: (SETEQ x)
+       // match: (VPSLLQ512 x (MOVQconst [c]))
+       // result: (VPSLLQ512const [uint8(c)] x)
        for {
-               if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETNE {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpAMD64SETEQ)
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64VPSLLQ512const)
+               v.AuxInt = uint8ToAuxInt(uint8(c))
                v.AddArg(x)
                return true
        }
-       // match: (XORLconst [1] (SETEQ x))
-       // result: (SETNE x)
+       return false
+}
+func rewriteValueAMD64_OpAMD64VPSLLQMasked128(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (VPSLLQMasked128 x (MOVQconst [c]) mask)
+       // result: (VPSLLQMasked128const [uint8(c)] x mask)
        for {
-               if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETEQ {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpAMD64SETNE)
-               v.AddArg(x)
+               c := auxIntToInt64(v_1.AuxInt)
+               mask := v_2
+               v.reset(OpAMD64VPSLLQMasked128const)
+               v.AuxInt = uint8ToAuxInt(uint8(c))
+               v.AddArg2(x, mask)
                return true
        }
-       // match: (XORLconst [1] (SETL x))
-       // result: (SETGE x)
+       return false
+}
+func rewriteValueAMD64_OpAMD64VPSLLQMasked256(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (VPSLLQMasked256 x (MOVQconst [c]) mask)
+       // result: (VPSLLQMasked256const [uint8(c)] x mask)
        for {
-               if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETL {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpAMD64SETGE)
-               v.AddArg(x)
+               c := auxIntToInt64(v_1.AuxInt)
+               mask := v_2
+               v.reset(OpAMD64VPSLLQMasked256const)
+               v.AuxInt = uint8ToAuxInt(uint8(c))
+               v.AddArg2(x, mask)
                return true
        }
-       // match: (XORLconst [1] (SETGE x))
-       // result: (SETL x)
+       return false
+}
+func rewriteValueAMD64_OpAMD64VPSLLQMasked512(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (VPSLLQMasked512 x (MOVQconst [c]) mask)
+       // result: (VPSLLQMasked512const [uint8(c)] x mask)
        for {
-               if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETGE {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpAMD64SETL)
-               v.AddArg(x)
+               c := auxIntToInt64(v_1.AuxInt)
+               mask := v_2
+               v.reset(OpAMD64VPSLLQMasked512const)
+               v.AuxInt = uint8ToAuxInt(uint8(c))
+               v.AddArg2(x, mask)
                return true
        }
-       // match: (XORLconst [1] (SETLE x))
-       // result: (SETG x)
+       return false
+}
+func rewriteValueAMD64_OpAMD64VPSLLW128(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (VPSLLW128 x (MOVQconst [c]))
+       // result: (VPSLLW128const [uint8(c)] x)
        for {
-               if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETLE {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpAMD64SETG)
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64VPSLLW128const)
+               v.AuxInt = uint8ToAuxInt(uint8(c))
                v.AddArg(x)
                return true
        }
-       // match: (XORLconst [1] (SETG x))
-       // result: (SETLE x)
+       return false
+}
+func rewriteValueAMD64_OpAMD64VPSLLW256(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (VPSLLW256 x (MOVQconst [c]))
+       // result: (VPSLLW256const [uint8(c)] x)
        for {
-               if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETG {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpAMD64SETLE)
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64VPSLLW256const)
+               v.AuxInt = uint8ToAuxInt(uint8(c))
                v.AddArg(x)
                return true
        }
-       // match: (XORLconst [1] (SETB x))
-       // result: (SETAE x)
+       return false
+}
+func rewriteValueAMD64_OpAMD64VPSLLW512(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (VPSLLW512 x (MOVQconst [c]))
+       // result: (VPSLLW512const [uint8(c)] x)
        for {
-               if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETB {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpAMD64SETAE)
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64VPSLLW512const)
+               v.AuxInt = uint8ToAuxInt(uint8(c))
                v.AddArg(x)
                return true
        }
-       // match: (XORLconst [1] (SETAE x))
-       // result: (SETB x)
+       return false
+}
+func rewriteValueAMD64_OpAMD64VPSLLWMasked128(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (VPSLLWMasked128 x (MOVQconst [c]) mask)
+       // result: (VPSLLWMasked128const [uint8(c)] x mask)
        for {
-               if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETAE {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpAMD64SETB)
-               v.AddArg(x)
+               c := auxIntToInt64(v_1.AuxInt)
+               mask := v_2
+               v.reset(OpAMD64VPSLLWMasked128const)
+               v.AuxInt = uint8ToAuxInt(uint8(c))
+               v.AddArg2(x, mask)
                return true
        }
-       // match: (XORLconst [1] (SETBE x))
-       // result: (SETA x)
+       return false
+}
+func rewriteValueAMD64_OpAMD64VPSLLWMasked256(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (VPSLLWMasked256 x (MOVQconst [c]) mask)
+       // result: (VPSLLWMasked256const [uint8(c)] x mask)
        for {
-               if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETBE {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpAMD64SETA)
-               v.AddArg(x)
+               c := auxIntToInt64(v_1.AuxInt)
+               mask := v_2
+               v.reset(OpAMD64VPSLLWMasked256const)
+               v.AuxInt = uint8ToAuxInt(uint8(c))
+               v.AddArg2(x, mask)
                return true
        }
-       // match: (XORLconst [1] (SETA x))
-       // result: (SETBE x)
+       return false
+}
+func rewriteValueAMD64_OpAMD64VPSLLWMasked512(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (VPSLLWMasked512 x (MOVQconst [c]) mask)
+       // result: (VPSLLWMasked512const [uint8(c)] x mask)
        for {
-               if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETA {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
                        break
                }
-               x := v_0.Args[0]
-               v.reset(OpAMD64SETBE)
-               v.AddArg(x)
+               c := auxIntToInt64(v_1.AuxInt)
+               mask := v_2
+               v.reset(OpAMD64VPSLLWMasked512const)
+               v.AuxInt = uint8ToAuxInt(uint8(c))
+               v.AddArg2(x, mask)
                return true
        }
-       // match: (XORLconst [c] (XORLconst [d] x))
-       // result: (XORLconst [c ^ d] x)
+       return false
+}
+func rewriteValueAMD64_OpAMD64VPSRAD128(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (VPSRAD128 x (MOVQconst [c]))
+       // result: (VPSRAD128const [uint8(c)] x)
        for {
-               c := auxIntToInt32(v.AuxInt)
-               if v_0.Op != OpAMD64XORLconst {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
                        break
                }
-               d := auxIntToInt32(v_0.AuxInt)
-               x := v_0.Args[0]
-               v.reset(OpAMD64XORLconst)
-               v.AuxInt = int32ToAuxInt(c ^ d)
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64VPSRAD128const)
+               v.AuxInt = uint8ToAuxInt(uint8(c))
                v.AddArg(x)
                return true
        }
-       // match: (XORLconst [c] x)
-       // cond: c==0
-       // result: x
+       return false
+}
+func rewriteValueAMD64_OpAMD64VPSRAD256(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (VPSRAD256 x (MOVQconst [c]))
+       // result: (VPSRAD256const [uint8(c)] x)
        for {
-               c := auxIntToInt32(v.AuxInt)
                x := v_0
-               if !(c == 0) {
+               if v_1.Op != OpAMD64MOVQconst {
                        break
                }
-               v.copyOf(x)
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64VPSRAD256const)
+               v.AuxInt = uint8ToAuxInt(uint8(c))
+               v.AddArg(x)
                return true
        }
-       // match: (XORLconst [c] (MOVLconst [d]))
-       // result: (MOVLconst [c^d])
+       return false
+}
+func rewriteValueAMD64_OpAMD64VPSRAD512(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (VPSRAD512 x (MOVQconst [c]))
+       // result: (VPSRAD512const [uint8(c)] x)
        for {
-               c := auxIntToInt32(v.AuxInt)
-               if v_0.Op != OpAMD64MOVLconst {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
                        break
                }
-               d := auxIntToInt32(v_0.AuxInt)
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = int32ToAuxInt(c ^ d)
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64VPSRAD512const)
+               v.AuxInt = uint8ToAuxInt(uint8(c))
+               v.AddArg(x)
                return true
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64XORLconstmodify(v *Value) bool {
+func rewriteValueAMD64_OpAMD64VPSRADMasked128(v *Value) bool {
+       v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (XORLconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem)
-       // cond: ValAndOff(valoff1).canAdd32(off2)
-       // result: (XORLconstmodify [ValAndOff(valoff1).addOffset32(off2)] {sym} base mem)
+       // match: (VPSRADMasked128 x (MOVQconst [c]) mask)
+       // result: (VPSRADMasked128const [uint8(c)] x mask)
        for {
-               valoff1 := auxIntToValAndOff(v.AuxInt)
-               sym := auxToSym(v.Aux)
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               mask := v_2
+               v.reset(OpAMD64VPSRADMasked128const)
+               v.AuxInt = uint8ToAuxInt(uint8(c))
+               v.AddArg2(x, mask)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64VPSRADMasked256(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (VPSRADMasked256 x (MOVQconst [c]) mask)
+       // result: (VPSRADMasked256const [uint8(c)] x mask)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               mask := v_2
+               v.reset(OpAMD64VPSRADMasked256const)
+               v.AuxInt = uint8ToAuxInt(uint8(c))
+               v.AddArg2(x, mask)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64VPSRADMasked512(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (VPSRADMasked512 x (MOVQconst [c]) mask)
+       // result: (VPSRADMasked512const [uint8(c)] x mask)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               mask := v_2
+               v.reset(OpAMD64VPSRADMasked512const)
+               v.AuxInt = uint8ToAuxInt(uint8(c))
+               v.AddArg2(x, mask)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64VPSRAQ128(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (VPSRAQ128 x (MOVQconst [c]))
+       // result: (VPSRAQ128const [uint8(c)] x)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64VPSRAQ128const)
+               v.AuxInt = uint8ToAuxInt(uint8(c))
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64VPSRAQ256(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (VPSRAQ256 x (MOVQconst [c]))
+       // result: (VPSRAQ256const [uint8(c)] x)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64VPSRAQ256const)
+               v.AuxInt = uint8ToAuxInt(uint8(c))
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64VPSRAQ512(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (VPSRAQ512 x (MOVQconst [c]))
+       // result: (VPSRAQ512const [uint8(c)] x)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64VPSRAQ512const)
+               v.AuxInt = uint8ToAuxInt(uint8(c))
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64VPSRAQMasked128(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (VPSRAQMasked128 x (MOVQconst [c]) mask)
+       // result: (VPSRAQMasked128const [uint8(c)] x mask)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               mask := v_2
+               v.reset(OpAMD64VPSRAQMasked128const)
+               v.AuxInt = uint8ToAuxInt(uint8(c))
+               v.AddArg2(x, mask)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64VPSRAQMasked256(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (VPSRAQMasked256 x (MOVQconst [c]) mask)
+       // result: (VPSRAQMasked256const [uint8(c)] x mask)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               mask := v_2
+               v.reset(OpAMD64VPSRAQMasked256const)
+               v.AuxInt = uint8ToAuxInt(uint8(c))
+               v.AddArg2(x, mask)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64VPSRAQMasked512(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (VPSRAQMasked512 x (MOVQconst [c]) mask)
+       // result: (VPSRAQMasked512const [uint8(c)] x mask)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               mask := v_2
+               v.reset(OpAMD64VPSRAQMasked512const)
+               v.AuxInt = uint8ToAuxInt(uint8(c))
+               v.AddArg2(x, mask)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64VPSRAW128(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (VPSRAW128 x (MOVQconst [c]))
+       // result: (VPSRAW128const [uint8(c)] x)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64VPSRAW128const)
+               v.AuxInt = uint8ToAuxInt(uint8(c))
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64VPSRAW256(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (VPSRAW256 x (MOVQconst [c]))
+       // result: (VPSRAW256const [uint8(c)] x)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64VPSRAW256const)
+               v.AuxInt = uint8ToAuxInt(uint8(c))
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64VPSRAW512(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (VPSRAW512 x (MOVQconst [c]))
+       // result: (VPSRAW512const [uint8(c)] x)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64VPSRAW512const)
+               v.AuxInt = uint8ToAuxInt(uint8(c))
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64VPSRAWMasked128(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (VPSRAWMasked128 x (MOVQconst [c]) mask)
+       // result: (VPSRAWMasked128const [uint8(c)] x mask)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               mask := v_2
+               v.reset(OpAMD64VPSRAWMasked128const)
+               v.AuxInt = uint8ToAuxInt(uint8(c))
+               v.AddArg2(x, mask)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64VPSRAWMasked256(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (VPSRAWMasked256 x (MOVQconst [c]) mask)
+       // result: (VPSRAWMasked256const [uint8(c)] x mask)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               mask := v_2
+               v.reset(OpAMD64VPSRAWMasked256const)
+               v.AuxInt = uint8ToAuxInt(uint8(c))
+               v.AddArg2(x, mask)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64VPSRAWMasked512(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (VPSRAWMasked512 x (MOVQconst [c]) mask)
+       // result: (VPSRAWMasked512const [uint8(c)] x mask)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               mask := v_2
+               v.reset(OpAMD64VPSRAWMasked512const)
+               v.AuxInt = uint8ToAuxInt(uint8(c))
+               v.AddArg2(x, mask)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64XADDLlock(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (XADDLlock [off1] {sym} val (ADDQconst [off2] ptr) mem)
+       // cond: is32Bit(int64(off1)+int64(off2))
+       // result: (XADDLlock [off1+off2] {sym} val ptr mem)
+       for {
+               off1 := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               val := v_0
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := auxIntToInt32(v_1.AuxInt)
+               ptr := v_1.Args[0]
+               mem := v_2
+               if !(is32Bit(int64(off1) + int64(off2))) {
+                       break
+               }
+               v.reset(OpAMD64XADDLlock)
+               v.AuxInt = int32ToAuxInt(off1 + off2)
+               v.Aux = symToAux(sym)
+               v.AddArg3(val, ptr, mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64XADDQlock(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (XADDQlock [off1] {sym} val (ADDQconst [off2] ptr) mem)
+       // cond: is32Bit(int64(off1)+int64(off2))
+       // result: (XADDQlock [off1+off2] {sym} val ptr mem)
+       for {
+               off1 := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               val := v_0
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := auxIntToInt32(v_1.AuxInt)
+               ptr := v_1.Args[0]
+               mem := v_2
+               if !(is32Bit(int64(off1) + int64(off2))) {
+                       break
+               }
+               v.reset(OpAMD64XADDQlock)
+               v.AuxInt = int32ToAuxInt(off1 + off2)
+               v.Aux = symToAux(sym)
+               v.AddArg3(val, ptr, mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64XCHGL(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (XCHGL [off1] {sym} val (ADDQconst [off2] ptr) mem)
+       // cond: is32Bit(int64(off1)+int64(off2))
+       // result: (XCHGL [off1+off2] {sym} val ptr mem)
+       for {
+               off1 := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               val := v_0
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := auxIntToInt32(v_1.AuxInt)
+               ptr := v_1.Args[0]
+               mem := v_2
+               if !(is32Bit(int64(off1) + int64(off2))) {
+                       break
+               }
+               v.reset(OpAMD64XCHGL)
+               v.AuxInt = int32ToAuxInt(off1 + off2)
+               v.Aux = symToAux(sym)
+               v.AddArg3(val, ptr, mem)
+               return true
+       }
+       // match: (XCHGL [off1] {sym1} val (LEAQ [off2] {sym2} ptr) mem)
+       // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && ptr.Op != OpSB
+       // result: (XCHGL [off1+off2] {mergeSym(sym1,sym2)} val ptr mem)
+       for {
+               off1 := auxIntToInt32(v.AuxInt)
+               sym1 := auxToSym(v.Aux)
+               val := v_0
+               if v_1.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := auxIntToInt32(v_1.AuxInt)
+               sym2 := auxToSym(v_1.Aux)
+               ptr := v_1.Args[0]
+               mem := v_2
+               if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && ptr.Op != OpSB) {
+                       break
+               }
+               v.reset(OpAMD64XCHGL)
+               v.AuxInt = int32ToAuxInt(off1 + off2)
+               v.Aux = symToAux(mergeSym(sym1, sym2))
+               v.AddArg3(val, ptr, mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64XCHGQ(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (XCHGQ [off1] {sym} val (ADDQconst [off2] ptr) mem)
+       // cond: is32Bit(int64(off1)+int64(off2))
+       // result: (XCHGQ [off1+off2] {sym} val ptr mem)
+       for {
+               off1 := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               val := v_0
+               if v_1.Op != OpAMD64ADDQconst {
+                       break
+               }
+               off2 := auxIntToInt32(v_1.AuxInt)
+               ptr := v_1.Args[0]
+               mem := v_2
+               if !(is32Bit(int64(off1) + int64(off2))) {
+                       break
+               }
+               v.reset(OpAMD64XCHGQ)
+               v.AuxInt = int32ToAuxInt(off1 + off2)
+               v.Aux = symToAux(sym)
+               v.AddArg3(val, ptr, mem)
+               return true
+       }
+       // match: (XCHGQ [off1] {sym1} val (LEAQ [off2] {sym2} ptr) mem)
+       // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && ptr.Op != OpSB
+       // result: (XCHGQ [off1+off2] {mergeSym(sym1,sym2)} val ptr mem)
+       for {
+               off1 := auxIntToInt32(v.AuxInt)
+               sym1 := auxToSym(v.Aux)
+               val := v_0
+               if v_1.Op != OpAMD64LEAQ {
+                       break
+               }
+               off2 := auxIntToInt32(v_1.AuxInt)
+               sym2 := auxToSym(v_1.Aux)
+               ptr := v_1.Args[0]
+               mem := v_2
+               if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && ptr.Op != OpSB) {
+                       break
+               }
+               v.reset(OpAMD64XCHGQ)
+               v.AuxInt = int32ToAuxInt(off1 + off2)
+               v.Aux = symToAux(mergeSym(sym1, sym2))
+               v.AddArg3(val, ptr, mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64XORL(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (XORL (SHLL (MOVLconst [1]) y) x)
+       // result: (BTCL x y)
+       for {
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       if v_0.Op != OpAMD64SHLL {
+                               continue
+                       }
+                       y := v_0.Args[1]
+                       v_0_0 := v_0.Args[0]
+                       if v_0_0.Op != OpAMD64MOVLconst || auxIntToInt32(v_0_0.AuxInt) != 1 {
+                               continue
+                       }
+                       x := v_1
+                       v.reset(OpAMD64BTCL)
+                       v.AddArg2(x, y)
+                       return true
+               }
+               break
+       }
+       // match: (XORL x (MOVLconst [c]))
+       // result: (XORLconst [c] x)
+       for {
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       x := v_0
+                       if v_1.Op != OpAMD64MOVLconst {
+                               continue
+                       }
+                       c := auxIntToInt32(v_1.AuxInt)
+                       v.reset(OpAMD64XORLconst)
+                       v.AuxInt = int32ToAuxInt(c)
+                       v.AddArg(x)
+                       return true
+               }
+               break
+       }
+       // match: (XORL x x)
+       // result: (MOVLconst [0])
+       for {
+               x := v_0
+               if x != v_1 {
+                       break
+               }
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = int32ToAuxInt(0)
+               return true
+       }
+       // match: (XORL x l:(MOVLload [off] {sym} ptr mem))
+       // cond: canMergeLoadClobber(v, l, x) && clobber(l)
+       // result: (XORLload x [off] {sym} ptr mem)
+       for {
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       x := v_0
+                       l := v_1
+                       if l.Op != OpAMD64MOVLload {
+                               continue
+                       }
+                       off := auxIntToInt32(l.AuxInt)
+                       sym := auxToSym(l.Aux)
+                       mem := l.Args[1]
+                       ptr := l.Args[0]
+                       if !(canMergeLoadClobber(v, l, x) && clobber(l)) {
+                               continue
+                       }
+                       v.reset(OpAMD64XORLload)
+                       v.AuxInt = int32ToAuxInt(off)
+                       v.Aux = symToAux(sym)
+                       v.AddArg3(x, ptr, mem)
+                       return true
+               }
+               break
+       }
+       // match: (XORL x (ADDLconst [-1] x))
+       // cond: buildcfg.GOAMD64 >= 3
+       // result: (BLSMSKL x)
+       for {
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       x := v_0
+                       if v_1.Op != OpAMD64ADDLconst || auxIntToInt32(v_1.AuxInt) != -1 || x != v_1.Args[0] || !(buildcfg.GOAMD64 >= 3) {
+                               continue
+                       }
+                       v.reset(OpAMD64BLSMSKL)
+                       v.AddArg(x)
+                       return true
+               }
+               break
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64XORLconst(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (XORLconst [1] (SETNE x))
+       // result: (SETEQ x)
+       for {
+               if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETNE {
+                       break
+               }
+               x := v_0.Args[0]
+               v.reset(OpAMD64SETEQ)
+               v.AddArg(x)
+               return true
+       }
+       // match: (XORLconst [1] (SETEQ x))
+       // result: (SETNE x)
+       for {
+               if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETEQ {
+                       break
+               }
+               x := v_0.Args[0]
+               v.reset(OpAMD64SETNE)
+               v.AddArg(x)
+               return true
+       }
+       // match: (XORLconst [1] (SETL x))
+       // result: (SETGE x)
+       for {
+               if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETL {
+                       break
+               }
+               x := v_0.Args[0]
+               v.reset(OpAMD64SETGE)
+               v.AddArg(x)
+               return true
+       }
+       // match: (XORLconst [1] (SETGE x))
+       // result: (SETL x)
+       for {
+               if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETGE {
+                       break
+               }
+               x := v_0.Args[0]
+               v.reset(OpAMD64SETL)
+               v.AddArg(x)
+               return true
+       }
+       // match: (XORLconst [1] (SETLE x))
+       // result: (SETG x)
+       for {
+               if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETLE {
+                       break
+               }
+               x := v_0.Args[0]
+               v.reset(OpAMD64SETG)
+               v.AddArg(x)
+               return true
+       }
+       // match: (XORLconst [1] (SETG x))
+       // result: (SETLE x)
+       for {
+               if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETG {
+                       break
+               }
+               x := v_0.Args[0]
+               v.reset(OpAMD64SETLE)
+               v.AddArg(x)
+               return true
+       }
+       // match: (XORLconst [1] (SETB x))
+       // result: (SETAE x)
+       for {
+               if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETB {
+                       break
+               }
+               x := v_0.Args[0]
+               v.reset(OpAMD64SETAE)
+               v.AddArg(x)
+               return true
+       }
+       // match: (XORLconst [1] (SETAE x))
+       // result: (SETB x)
+       for {
+               if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETAE {
+                       break
+               }
+               x := v_0.Args[0]
+               v.reset(OpAMD64SETB)
+               v.AddArg(x)
+               return true
+       }
+       // match: (XORLconst [1] (SETBE x))
+       // result: (SETA x)
+       for {
+               if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETBE {
+                       break
+               }
+               x := v_0.Args[0]
+               v.reset(OpAMD64SETA)
+               v.AddArg(x)
+               return true
+       }
+       // match: (XORLconst [1] (SETA x))
+       // result: (SETBE x)
+       for {
+               if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETA {
+                       break
+               }
+               x := v_0.Args[0]
+               v.reset(OpAMD64SETBE)
+               v.AddArg(x)
+               return true
+       }
+       // match: (XORLconst [c] (XORLconst [d] x))
+       // result: (XORLconst [c ^ d] x)
+       for {
+               c := auxIntToInt32(v.AuxInt)
+               if v_0.Op != OpAMD64XORLconst {
+                       break
+               }
+               d := auxIntToInt32(v_0.AuxInt)
+               x := v_0.Args[0]
+               v.reset(OpAMD64XORLconst)
+               v.AuxInt = int32ToAuxInt(c ^ d)
+               v.AddArg(x)
+               return true
+       }
+       // match: (XORLconst [c] x)
+       // cond: c==0
+       // result: x
+       for {
+               c := auxIntToInt32(v.AuxInt)
+               x := v_0
+               if !(c == 0) {
+                       break
+               }
+               v.copyOf(x)
+               return true
+       }
+       // match: (XORLconst [c] (MOVLconst [d]))
+       // result: (MOVLconst [c^d])
+       for {
+               c := auxIntToInt32(v.AuxInt)
+               if v_0.Op != OpAMD64MOVLconst {
+                       break
+               }
+               d := auxIntToInt32(v_0.AuxInt)
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = int32ToAuxInt(c ^ d)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64XORLconstmodify(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (XORLconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem)
+       // cond: ValAndOff(valoff1).canAdd32(off2)
+       // result: (XORLconstmodify [ValAndOff(valoff1).addOffset32(off2)] {sym} base mem)
+       for {
+               valoff1 := auxIntToValAndOff(v.AuxInt)
+               sym := auxToSym(v.Aux)
                if v_0.Op != OpAMD64ADDQconst {
                        break
                }
@@ -51779,2188 +52571,1252 @@ func rewriteValueAMD64_OpSetHiFloat32x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        // match: (SetHiFloat32x16 x y)
-       // result: (VINSERTF64X4512 [1] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VINSERTF64X4512)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpSetHiFloat32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (SetHiFloat32x8 x y)
-       // result: (VINSERTF128256 [1] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VINSERTF128256)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpSetHiFloat64x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (SetHiFloat64x4 x y)
-       // result: (VINSERTF128256 [1] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VINSERTF128256)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpSetHiFloat64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (SetHiFloat64x8 x y)
-       // result: (VINSERTF64X4512 [1] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VINSERTF64X4512)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpSetHiInt16x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (SetHiInt16x16 x y)
-       // result: (VINSERTI128256 [1] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VINSERTI128256)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpSetHiInt16x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (SetHiInt16x32 x y)
-       // result: (VINSERTI64X4512 [1] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VINSERTI64X4512)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpSetHiInt32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (SetHiInt32x16 x y)
-       // result: (VINSERTI64X4512 [1] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VINSERTI64X4512)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpSetHiInt32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (SetHiInt32x8 x y)
-       // result: (VINSERTI128256 [1] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VINSERTI128256)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpSetHiInt64x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (SetHiInt64x4 x y)
-       // result: (VINSERTI128256 [1] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VINSERTI128256)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpSetHiInt64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (SetHiInt64x8 x y)
-       // result: (VINSERTI64X4512 [1] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VINSERTI64X4512)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpSetHiInt8x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (SetHiInt8x32 x y)
-       // result: (VINSERTI128256 [1] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VINSERTI128256)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpSetHiInt8x64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (SetHiInt8x64 x y)
-       // result: (VINSERTI64X4512 [1] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VINSERTI64X4512)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpSetHiUint16x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (SetHiUint16x16 x y)
-       // result: (VINSERTI128256 [1] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VINSERTI128256)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpSetHiUint16x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (SetHiUint16x32 x y)
-       // result: (VINSERTI64X4512 [1] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VINSERTI64X4512)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpSetHiUint32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (SetHiUint32x16 x y)
-       // result: (VINSERTI64X4512 [1] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VINSERTI64X4512)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpSetHiUint32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (SetHiUint32x8 x y)
-       // result: (VINSERTI128256 [1] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VINSERTI128256)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpSetHiUint64x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (SetHiUint64x4 x y)
-       // result: (VINSERTI128256 [1] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VINSERTI128256)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpSetHiUint64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (SetHiUint64x8 x y)
-       // result: (VINSERTI64X4512 [1] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VINSERTI64X4512)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpSetHiUint8x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (SetHiUint8x32 x y)
-       // result: (VINSERTI128256 [1] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VINSERTI128256)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpSetHiUint8x64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (SetHiUint8x64 x y)
-       // result: (VINSERTI64X4512 [1] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VINSERTI64X4512)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpSetLoFloat32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (SetLoFloat32x16 x y)
-       // result: (VINSERTF64X4512 [0] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VINSERTF64X4512)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpSetLoFloat32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (SetLoFloat32x8 x y)
-       // result: (VINSERTF128256 [0] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VINSERTF128256)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpSetLoFloat64x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (SetLoFloat64x4 x y)
-       // result: (VINSERTF128256 [0] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VINSERTF128256)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpSetLoFloat64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (SetLoFloat64x8 x y)
-       // result: (VINSERTF64X4512 [0] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VINSERTF64X4512)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpSetLoInt16x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (SetLoInt16x16 x y)
-       // result: (VINSERTI128256 [0] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VINSERTI128256)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpSetLoInt16x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (SetLoInt16x32 x y)
-       // result: (VINSERTI64X4512 [0] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VINSERTI64X4512)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpSetLoInt32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (SetLoInt32x16 x y)
-       // result: (VINSERTI64X4512 [0] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VINSERTI64X4512)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpSetLoInt32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (SetLoInt32x8 x y)
-       // result: (VINSERTI128256 [0] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VINSERTI128256)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpSetLoInt64x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (SetLoInt64x4 x y)
-       // result: (VINSERTI128256 [0] x y)
+       // result: (VINSERTF64X4512 [1] x y)
        for {
                x := v_0
                y := v_1
-               v.reset(OpAMD64VINSERTI128256)
-               v.AuxInt = uint8ToAuxInt(0)
+               v.reset(OpAMD64VINSERTF64X4512)
+               v.AuxInt = uint8ToAuxInt(1)
                v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpSetLoInt64x8(v *Value) bool {
+func rewriteValueAMD64_OpSetHiFloat32x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (SetLoInt64x8 x y)
-       // result: (VINSERTI64X4512 [0] x y)
+       // match: (SetHiFloat32x8 x y)
+       // result: (VINSERTF128256 [1] x y)
        for {
                x := v_0
                y := v_1
-               v.reset(OpAMD64VINSERTI64X4512)
-               v.AuxInt = uint8ToAuxInt(0)
+               v.reset(OpAMD64VINSERTF128256)
+               v.AuxInt = uint8ToAuxInt(1)
                v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpSetLoInt8x32(v *Value) bool {
+func rewriteValueAMD64_OpSetHiFloat64x4(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (SetLoInt8x32 x y)
-       // result: (VINSERTI128256 [0] x y)
+       // match: (SetHiFloat64x4 x y)
+       // result: (VINSERTF128256 [1] x y)
        for {
                x := v_0
                y := v_1
-               v.reset(OpAMD64VINSERTI128256)
-               v.AuxInt = uint8ToAuxInt(0)
+               v.reset(OpAMD64VINSERTF128256)
+               v.AuxInt = uint8ToAuxInt(1)
                v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpSetLoInt8x64(v *Value) bool {
+func rewriteValueAMD64_OpSetHiFloat64x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (SetLoInt8x64 x y)
-       // result: (VINSERTI64X4512 [0] x y)
+       // match: (SetHiFloat64x8 x y)
+       // result: (VINSERTF64X4512 [1] x y)
        for {
                x := v_0
                y := v_1
-               v.reset(OpAMD64VINSERTI64X4512)
-               v.AuxInt = uint8ToAuxInt(0)
+               v.reset(OpAMD64VINSERTF64X4512)
+               v.AuxInt = uint8ToAuxInt(1)
                v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpSetLoUint16x16(v *Value) bool {
+func rewriteValueAMD64_OpSetHiInt16x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (SetLoUint16x16 x y)
-       // result: (VINSERTI128256 [0] x y)
+       // match: (SetHiInt16x16 x y)
+       // result: (VINSERTI128256 [1] x y)
        for {
                x := v_0
                y := v_1
                v.reset(OpAMD64VINSERTI128256)
-               v.AuxInt = uint8ToAuxInt(0)
+               v.AuxInt = uint8ToAuxInt(1)
                v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpSetLoUint16x32(v *Value) bool {
+func rewriteValueAMD64_OpSetHiInt16x32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (SetLoUint16x32 x y)
-       // result: (VINSERTI64X4512 [0] x y)
+       // match: (SetHiInt16x32 x y)
+       // result: (VINSERTI64X4512 [1] x y)
        for {
                x := v_0
                y := v_1
                v.reset(OpAMD64VINSERTI64X4512)
-               v.AuxInt = uint8ToAuxInt(0)
+               v.AuxInt = uint8ToAuxInt(1)
                v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpSetLoUint32x16(v *Value) bool {
+func rewriteValueAMD64_OpSetHiInt32x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (SetLoUint32x16 x y)
-       // result: (VINSERTI64X4512 [0] x y)
+       // match: (SetHiInt32x16 x y)
+       // result: (VINSERTI64X4512 [1] x y)
        for {
                x := v_0
                y := v_1
                v.reset(OpAMD64VINSERTI64X4512)
-               v.AuxInt = uint8ToAuxInt(0)
+               v.AuxInt = uint8ToAuxInt(1)
                v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpSetLoUint32x8(v *Value) bool {
+func rewriteValueAMD64_OpSetHiInt32x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (SetLoUint32x8 x y)
-       // result: (VINSERTI128256 [0] x y)
+       // match: (SetHiInt32x8 x y)
+       // result: (VINSERTI128256 [1] x y)
        for {
                x := v_0
                y := v_1
                v.reset(OpAMD64VINSERTI128256)
-               v.AuxInt = uint8ToAuxInt(0)
+               v.AuxInt = uint8ToAuxInt(1)
                v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpSetLoUint64x4(v *Value) bool {
+func rewriteValueAMD64_OpSetHiInt64x4(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (SetLoUint64x4 x y)
-       // result: (VINSERTI128256 [0] x y)
+       // match: (SetHiInt64x4 x y)
+       // result: (VINSERTI128256 [1] x y)
        for {
                x := v_0
                y := v_1
                v.reset(OpAMD64VINSERTI128256)
-               v.AuxInt = uint8ToAuxInt(0)
+               v.AuxInt = uint8ToAuxInt(1)
                v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpSetLoUint64x8(v *Value) bool {
+func rewriteValueAMD64_OpSetHiInt64x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (SetLoUint64x8 x y)
-       // result: (VINSERTI64X4512 [0] x y)
+       // match: (SetHiInt64x8 x y)
+       // result: (VINSERTI64X4512 [1] x y)
        for {
                x := v_0
                y := v_1
                v.reset(OpAMD64VINSERTI64X4512)
-               v.AuxInt = uint8ToAuxInt(0)
+               v.AuxInt = uint8ToAuxInt(1)
                v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpSetLoUint8x32(v *Value) bool {
+func rewriteValueAMD64_OpSetHiInt8x32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (SetLoUint8x32 x y)
-       // result: (VINSERTI128256 [0] x y)
+       // match: (SetHiInt8x32 x y)
+       // result: (VINSERTI128256 [1] x y)
        for {
                x := v_0
                y := v_1
                v.reset(OpAMD64VINSERTI128256)
-               v.AuxInt = uint8ToAuxInt(0)
+               v.AuxInt = uint8ToAuxInt(1)
                v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpSetLoUint8x64(v *Value) bool {
+func rewriteValueAMD64_OpSetHiInt8x64(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (SetLoUint8x64 x y)
-       // result: (VINSERTI64X4512 [0] x y)
+       // match: (SetHiInt8x64 x y)
+       // result: (VINSERTI64X4512 [1] x y)
        for {
                x := v_0
                y := v_1
                v.reset(OpAMD64VINSERTI64X4512)
-               v.AuxInt = uint8ToAuxInt(0)
+               v.AuxInt = uint8ToAuxInt(1)
                v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllLeftConcatMaskedInt16x16 [a] x y mask)
-       // result: (VPSHLDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHLDWMasked256)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllLeftConcatMaskedInt16x32 [a] x y mask)
-       // result: (VPSHLDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHLDWMasked512)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllLeftConcatMaskedInt16x8 [a] x y mask)
-       // result: (VPSHLDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHLDWMasked128)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt32x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetHiUint16x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllLeftConcatMaskedInt32x16 [a] x y mask)
-       // result: (VPSHLDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (SetHiUint16x16 x y)
+       // result: (VINSERTI128256 [1] x y)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHLDDMasked512)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt32x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetHiUint16x32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllLeftConcatMaskedInt32x4 [a] x y mask)
-       // result: (VPSHLDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (SetHiUint16x32 x y)
+       // result: (VINSERTI64X4512 [1] x y)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHLDDMasked128)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt32x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetHiUint32x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllLeftConcatMaskedInt32x8 [a] x y mask)
-       // result: (VPSHLDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (SetHiUint32x16 x y)
+       // result: (VINSERTI64X4512 [1] x y)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHLDDMasked256)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt64x2(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetHiUint32x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllLeftConcatMaskedInt64x2 [a] x y mask)
-       // result: (VPSHLDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (SetHiUint32x8 x y)
+       // result: (VINSERTI128256 [1] x y)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHLDQMasked128)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt64x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetHiUint64x4(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllLeftConcatMaskedInt64x4 [a] x y mask)
-       // result: (VPSHLDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (SetHiUint64x4 x y)
+       // result: (VINSERTI128256 [1] x y)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHLDQMasked256)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt64x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetHiUint64x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllLeftConcatMaskedInt64x8 [a] x y mask)
-       // result: (VPSHLDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (SetHiUint64x8 x y)
+       // result: (VINSERTI64X4512 [1] x y)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHLDQMasked512)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint16x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetHiUint8x32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllLeftConcatMaskedUint16x16 [a] x y mask)
-       // result: (VPSHLDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (SetHiUint8x32 x y)
+       // result: (VINSERTI128256 [1] x y)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHLDWMasked256)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint16x32(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetHiUint8x64(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllLeftConcatMaskedUint16x32 [a] x y mask)
-       // result: (VPSHLDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (SetHiUint8x64 x y)
+       // result: (VINSERTI64X4512 [1] x y)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHLDWMasked512)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint16x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetLoFloat32x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllLeftConcatMaskedUint16x8 [a] x y mask)
-       // result: (VPSHLDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (SetLoFloat32x16 x y)
+       // result: (VINSERTF64X4512 [0] x y)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHLDWMasked128)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VINSERTF64X4512)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint32x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetLoFloat32x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllLeftConcatMaskedUint32x16 [a] x y mask)
-       // result: (VPSHLDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (SetLoFloat32x8 x y)
+       // result: (VINSERTF128256 [0] x y)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHLDDMasked512)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VINSERTF128256)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint32x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetLoFloat64x4(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllLeftConcatMaskedUint32x4 [a] x y mask)
-       // result: (VPSHLDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (SetLoFloat64x4 x y)
+       // result: (VINSERTF128256 [0] x y)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHLDDMasked128)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VINSERTF128256)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint32x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetLoFloat64x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllLeftConcatMaskedUint32x8 [a] x y mask)
-       // result: (VPSHLDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (SetLoFloat64x8 x y)
+       // result: (VINSERTF64X4512 [0] x y)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHLDDMasked256)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VINSERTF64X4512)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint64x2(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetLoInt16x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllLeftConcatMaskedUint64x2 [a] x y mask)
-       // result: (VPSHLDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (SetLoInt16x16 x y)
+       // result: (VINSERTI128256 [0] x y)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHLDQMasked128)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint64x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetLoInt16x32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllLeftConcatMaskedUint64x4 [a] x y mask)
-       // result: (VPSHLDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (SetLoInt16x32 x y)
+       // result: (VINSERTI64X4512 [0] x y)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHLDQMasked256)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint64x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetLoInt32x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllLeftConcatMaskedUint64x8 [a] x y mask)
-       // result: (VPSHLDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (SetLoInt32x16 x y)
+       // result: (VINSERTI64X4512 [0] x y)
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHLDQMasked512)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftInt16x16(v *Value) bool {
+func rewriteValueAMD64_OpSetLoInt32x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (ShiftAllLeftInt16x16 x (MOVQconst [c]))
-       // result: (VPSLLW256const [uint8(c)] x)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               v.reset(OpAMD64VPSLLW256const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg(x)
-               return true
-       }
-       // match: (ShiftAllLeftInt16x16 x y)
-       // result: (VPSLLW256 x y)
+       // match: (SetLoInt32x8 x y)
+       // result: (VINSERTI128256 [0] x y)
        for {
                x := v_0
                y := v_1
-               v.reset(OpAMD64VPSLLW256)
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = uint8ToAuxInt(0)
                v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftInt16x32(v *Value) bool {
+func rewriteValueAMD64_OpSetLoInt64x4(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (ShiftAllLeftInt16x32 x (MOVQconst [c]))
-       // result: (VPSLLW512const [uint8(c)] x)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               v.reset(OpAMD64VPSLLW512const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg(x)
-               return true
-       }
-       // match: (ShiftAllLeftInt16x32 x y)
-       // result: (VPSLLW512 x y)
+       // match: (SetLoInt64x4 x y)
+       // result: (VINSERTI128256 [0] x y)
        for {
                x := v_0
                y := v_1
-               v.reset(OpAMD64VPSLLW512)
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = uint8ToAuxInt(0)
                v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftInt16x8(v *Value) bool {
+func rewriteValueAMD64_OpSetLoInt64x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (ShiftAllLeftInt16x8 x (MOVQconst [c]))
-       // result: (VPSLLW128const [uint8(c)] x)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               v.reset(OpAMD64VPSLLW128const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg(x)
-               return true
-       }
-       // match: (ShiftAllLeftInt16x8 x y)
-       // result: (VPSLLW128 x y)
+       // match: (SetLoInt64x8 x y)
+       // result: (VINSERTI64X4512 [0] x y)
        for {
                x := v_0
                y := v_1
-               v.reset(OpAMD64VPSLLW128)
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = uint8ToAuxInt(0)
                v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftInt32x16(v *Value) bool {
+func rewriteValueAMD64_OpSetLoInt8x32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (ShiftAllLeftInt32x16 x (MOVQconst [c]))
-       // result: (VPSLLD512const [uint8(c)] x)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               v.reset(OpAMD64VPSLLD512const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg(x)
-               return true
-       }
-       // match: (ShiftAllLeftInt32x16 x y)
-       // result: (VPSLLD512 x y)
+       // match: (SetLoInt8x32 x y)
+       // result: (VINSERTI128256 [0] x y)
        for {
                x := v_0
                y := v_1
-               v.reset(OpAMD64VPSLLD512)
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = uint8ToAuxInt(0)
                v.AddArg2(x, y)
                return true
        }
-}
-func rewriteValueAMD64_OpShiftAllLeftInt32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (ShiftAllLeftInt32x4 x (MOVQconst [c]))
-       // result: (VPSLLD128const [uint8(c)] x)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               v.reset(OpAMD64VPSLLD128const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg(x)
-               return true
-       }
-       // match: (ShiftAllLeftInt32x4 x y)
-       // result: (VPSLLD128 x y)
+}
+func rewriteValueAMD64_OpSetLoInt8x64(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetLoInt8x64 x y)
+       // result: (VINSERTI64X4512 [0] x y)
        for {
                x := v_0
                y := v_1
-               v.reset(OpAMD64VPSLLD128)
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = uint8ToAuxInt(0)
                v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftInt32x8(v *Value) bool {
+func rewriteValueAMD64_OpSetLoUint16x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (ShiftAllLeftInt32x8 x (MOVQconst [c]))
-       // result: (VPSLLD256const [uint8(c)] x)
+       // match: (SetLoUint16x16 x y)
+       // result: (VINSERTI128256 [0] x y)
        for {
                x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               v.reset(OpAMD64VPSLLD256const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg(x)
+               y := v_1
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                return true
        }
-       // match: (ShiftAllLeftInt32x8 x y)
-       // result: (VPSLLD256 x y)
+}
+func rewriteValueAMD64_OpSetLoUint16x32(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetLoUint16x32 x y)
+       // result: (VINSERTI64X4512 [0] x y)
        for {
                x := v_0
                y := v_1
-               v.reset(OpAMD64VPSLLD256)
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = uint8ToAuxInt(0)
                v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftInt64x2(v *Value) bool {
+func rewriteValueAMD64_OpSetLoUint32x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (ShiftAllLeftInt64x2 x (MOVQconst [c]))
-       // result: (VPSLLQ128const [uint8(c)] x)
+       // match: (SetLoUint32x16 x y)
+       // result: (VINSERTI64X4512 [0] x y)
        for {
                x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               v.reset(OpAMD64VPSLLQ128const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg(x)
+               y := v_1
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                return true
        }
-       // match: (ShiftAllLeftInt64x2 x y)
-       // result: (VPSLLQ128 x y)
+}
+func rewriteValueAMD64_OpSetLoUint32x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetLoUint32x8 x y)
+       // result: (VINSERTI128256 [0] x y)
        for {
                x := v_0
                y := v_1
-               v.reset(OpAMD64VPSLLQ128)
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = uint8ToAuxInt(0)
                v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftInt64x4(v *Value) bool {
+func rewriteValueAMD64_OpSetLoUint64x4(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (ShiftAllLeftInt64x4 x (MOVQconst [c]))
-       // result: (VPSLLQ256const [uint8(c)] x)
+       // match: (SetLoUint64x4 x y)
+       // result: (VINSERTI128256 [0] x y)
        for {
                x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               v.reset(OpAMD64VPSLLQ256const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg(x)
+               y := v_1
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                return true
        }
-       // match: (ShiftAllLeftInt64x4 x y)
-       // result: (VPSLLQ256 x y)
+}
+func rewriteValueAMD64_OpSetLoUint64x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetLoUint64x8 x y)
+       // result: (VINSERTI64X4512 [0] x y)
        for {
                x := v_0
                y := v_1
-               v.reset(OpAMD64VPSLLQ256)
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = uint8ToAuxInt(0)
                v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftInt64x8(v *Value) bool {
+func rewriteValueAMD64_OpSetLoUint8x32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (ShiftAllLeftInt64x8 x (MOVQconst [c]))
-       // result: (VPSLLQ512const [uint8(c)] x)
+       // match: (SetLoUint8x32 x y)
+       // result: (VINSERTI128256 [0] x y)
        for {
                x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               v.reset(OpAMD64VPSLLQ512const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg(x)
+               y := v_1
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                return true
        }
-       // match: (ShiftAllLeftInt64x8 x y)
-       // result: (VPSLLQ512 x y)
+}
+func rewriteValueAMD64_OpSetLoUint8x64(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetLoUint8x64 x y)
+       // result: (VINSERTI64X4512 [0] x y)
        for {
                x := v_0
                y := v_1
-               v.reset(OpAMD64VPSLLQ512)
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = uint8ToAuxInt(0)
                v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftMaskedInt16x16(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt16x16(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllLeftMaskedInt16x16 x (MOVQconst [c]) mask)
-       // result: (VPSLLWMasked256const [uint8(c)] x (VPMOVVec16x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSLLWMasked256const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-       // match: (ShiftAllLeftMaskedInt16x16 x y mask)
-       // result: (VPSLLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (ShiftAllLeftConcatMaskedInt16x16 [a] x y mask)
+       // result: (VPSHLDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
                mask := v_2
-               v.reset(OpAMD64VPSLLWMasked256)
+               v.reset(OpAMD64VPSHLDWMasked256)
+               v.AuxInt = uint8ToAuxInt(a)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(x, y, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftMaskedInt16x32(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt16x32(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllLeftMaskedInt16x32 x (MOVQconst [c]) mask)
-       // result: (VPSLLWMasked512const [uint8(c)] x (VPMOVVec16x32ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSLLWMasked512const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-       // match: (ShiftAllLeftMaskedInt16x32 x y mask)
-       // result: (VPSLLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (ShiftAllLeftConcatMaskedInt16x32 [a] x y mask)
+       // result: (VPSHLDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
                mask := v_2
-               v.reset(OpAMD64VPSLLWMasked512)
+               v.reset(OpAMD64VPSHLDWMasked512)
+               v.AuxInt = uint8ToAuxInt(a)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(x, y, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftMaskedInt16x8(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt16x8(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllLeftMaskedInt16x8 x (MOVQconst [c]) mask)
-       // result: (VPSLLWMasked128const [uint8(c)] x (VPMOVVec16x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSLLWMasked128const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-       // match: (ShiftAllLeftMaskedInt16x8 x y mask)
-       // result: (VPSLLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (ShiftAllLeftConcatMaskedInt16x8 [a] x y mask)
+       // result: (VPSHLDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
                mask := v_2
-               v.reset(OpAMD64VPSLLWMasked128)
+               v.reset(OpAMD64VPSHLDWMasked128)
+               v.AuxInt = uint8ToAuxInt(a)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(x, y, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftMaskedInt32x16(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt32x16(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllLeftMaskedInt32x16 x (MOVQconst [c]) mask)
-       // result: (VPSLLDMasked512const [uint8(c)] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSLLDMasked512const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-       // match: (ShiftAllLeftMaskedInt32x16 x y mask)
-       // result: (VPSLLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (ShiftAllLeftConcatMaskedInt32x16 [a] x y mask)
+       // result: (VPSHLDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
                mask := v_2
-               v.reset(OpAMD64VPSLLDMasked512)
+               v.reset(OpAMD64VPSHLDDMasked512)
+               v.AuxInt = uint8ToAuxInt(a)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(x, y, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftMaskedInt32x4(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt32x4(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllLeftMaskedInt32x4 x (MOVQconst [c]) mask)
-       // result: (VPSLLDMasked128const [uint8(c)] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSLLDMasked128const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-       // match: (ShiftAllLeftMaskedInt32x4 x y mask)
-       // result: (VPSLLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (ShiftAllLeftConcatMaskedInt32x4 [a] x y mask)
+       // result: (VPSHLDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
                mask := v_2
-               v.reset(OpAMD64VPSLLDMasked128)
+               v.reset(OpAMD64VPSHLDDMasked128)
+               v.AuxInt = uint8ToAuxInt(a)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(x, y, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftMaskedInt32x8(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt32x8(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllLeftMaskedInt32x8 x (MOVQconst [c]) mask)
-       // result: (VPSLLDMasked256const [uint8(c)] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSLLDMasked256const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-       // match: (ShiftAllLeftMaskedInt32x8 x y mask)
-       // result: (VPSLLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (ShiftAllLeftConcatMaskedInt32x8 [a] x y mask)
+       // result: (VPSHLDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
                mask := v_2
-               v.reset(OpAMD64VPSLLDMasked256)
+               v.reset(OpAMD64VPSHLDDMasked256)
+               v.AuxInt = uint8ToAuxInt(a)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(x, y, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftMaskedInt64x2(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt64x2(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllLeftMaskedInt64x2 x (MOVQconst [c]) mask)
-       // result: (VPSLLQMasked128const [uint8(c)] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSLLQMasked128const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-       // match: (ShiftAllLeftMaskedInt64x2 x y mask)
-       // result: (VPSLLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (ShiftAllLeftConcatMaskedInt64x2 [a] x y mask)
+       // result: (VPSHLDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
                mask := v_2
-               v.reset(OpAMD64VPSLLQMasked128)
+               v.reset(OpAMD64VPSHLDQMasked128)
+               v.AuxInt = uint8ToAuxInt(a)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(x, y, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftMaskedInt64x4(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt64x4(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllLeftMaskedInt64x4 x (MOVQconst [c]) mask)
-       // result: (VPSLLQMasked256const [uint8(c)] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSLLQMasked256const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-       // match: (ShiftAllLeftMaskedInt64x4 x y mask)
-       // result: (VPSLLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (ShiftAllLeftConcatMaskedInt64x4 [a] x y mask)
+       // result: (VPSHLDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
                mask := v_2
-               v.reset(OpAMD64VPSLLQMasked256)
+               v.reset(OpAMD64VPSHLDQMasked256)
+               v.AuxInt = uint8ToAuxInt(a)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(x, y, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftMaskedInt64x8(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt64x8(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllLeftMaskedInt64x8 x (MOVQconst [c]) mask)
-       // result: (VPSLLQMasked512const [uint8(c)] x (VPMOVVec64x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSLLQMasked512const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-       // match: (ShiftAllLeftMaskedInt64x8 x y mask)
-       // result: (VPSLLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (ShiftAllLeftConcatMaskedInt64x8 [a] x y mask)
+       // result: (VPSHLDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
                mask := v_2
-               v.reset(OpAMD64VPSLLQMasked512)
+               v.reset(OpAMD64VPSHLDQMasked512)
+               v.AuxInt = uint8ToAuxInt(a)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(x, y, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftMaskedUint16x16(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint16x16(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllLeftMaskedUint16x16 x (MOVQconst [c]) mask)
-       // result: (VPSLLWMasked256const [uint8(c)] x (VPMOVVec16x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSLLWMasked256const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-       // match: (ShiftAllLeftMaskedUint16x16 x y mask)
-       // result: (VPSLLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (ShiftAllLeftConcatMaskedUint16x16 [a] x y mask)
+       // result: (VPSHLDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
                mask := v_2
-               v.reset(OpAMD64VPSLLWMasked256)
+               v.reset(OpAMD64VPSHLDWMasked256)
+               v.AuxInt = uint8ToAuxInt(a)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(x, y, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftMaskedUint16x32(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint16x32(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllLeftMaskedUint16x32 x (MOVQconst [c]) mask)
-       // result: (VPSLLWMasked512const [uint8(c)] x (VPMOVVec16x32ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSLLWMasked512const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-       // match: (ShiftAllLeftMaskedUint16x32 x y mask)
-       // result: (VPSLLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (ShiftAllLeftConcatMaskedUint16x32 [a] x y mask)
+       // result: (VPSHLDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
                mask := v_2
-               v.reset(OpAMD64VPSLLWMasked512)
+               v.reset(OpAMD64VPSHLDWMasked512)
+               v.AuxInt = uint8ToAuxInt(a)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(x, y, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftMaskedUint16x8(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint16x8(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllLeftMaskedUint16x8 x (MOVQconst [c]) mask)
-       // result: (VPSLLWMasked128const [uint8(c)] x (VPMOVVec16x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSLLWMasked128const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-       // match: (ShiftAllLeftMaskedUint16x8 x y mask)
-       // result: (VPSLLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (ShiftAllLeftConcatMaskedUint16x8 [a] x y mask)
+       // result: (VPSHLDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
                mask := v_2
-               v.reset(OpAMD64VPSLLWMasked128)
+               v.reset(OpAMD64VPSHLDWMasked128)
+               v.AuxInt = uint8ToAuxInt(a)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(x, y, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftMaskedUint32x16(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint32x16(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllLeftMaskedUint32x16 x (MOVQconst [c]) mask)
-       // result: (VPSLLDMasked512const [uint8(c)] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSLLDMasked512const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-       // match: (ShiftAllLeftMaskedUint32x16 x y mask)
-       // result: (VPSLLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (ShiftAllLeftConcatMaskedUint32x16 [a] x y mask)
+       // result: (VPSHLDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
                mask := v_2
-               v.reset(OpAMD64VPSLLDMasked512)
+               v.reset(OpAMD64VPSHLDDMasked512)
+               v.AuxInt = uint8ToAuxInt(a)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(x, y, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftMaskedUint32x4(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint32x4(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllLeftMaskedUint32x4 x (MOVQconst [c]) mask)
-       // result: (VPSLLDMasked128const [uint8(c)] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSLLDMasked128const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-       // match: (ShiftAllLeftMaskedUint32x4 x y mask)
-       // result: (VPSLLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (ShiftAllLeftConcatMaskedUint32x4 [a] x y mask)
+       // result: (VPSHLDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
                mask := v_2
-               v.reset(OpAMD64VPSLLDMasked128)
+               v.reset(OpAMD64VPSHLDDMasked128)
+               v.AuxInt = uint8ToAuxInt(a)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(x, y, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftMaskedUint32x8(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint32x8(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllLeftMaskedUint32x8 x (MOVQconst [c]) mask)
-       // result: (VPSLLDMasked256const [uint8(c)] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSLLDMasked256const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-       // match: (ShiftAllLeftMaskedUint32x8 x y mask)
-       // result: (VPSLLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (ShiftAllLeftConcatMaskedUint32x8 [a] x y mask)
+       // result: (VPSHLDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
                mask := v_2
-               v.reset(OpAMD64VPSLLDMasked256)
+               v.reset(OpAMD64VPSHLDDMasked256)
+               v.AuxInt = uint8ToAuxInt(a)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(x, y, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftMaskedUint64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllLeftMaskedUint64x2 x (MOVQconst [c]) mask)
-       // result: (VPSLLQMasked128const [uint8(c)] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSLLQMasked128const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-       // match: (ShiftAllLeftMaskedUint64x2 x y mask)
-       // result: (VPSLLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpShiftAllLeftMaskedUint64x4(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint64x2(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllLeftMaskedUint64x4 x (MOVQconst [c]) mask)
-       // result: (VPSLLQMasked256const [uint8(c)] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSLLQMasked256const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-       // match: (ShiftAllLeftMaskedUint64x4 x y mask)
-       // result: (VPSLLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (ShiftAllLeftConcatMaskedUint64x2 [a] x y mask)
+       // result: (VPSHLDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
                mask := v_2
-               v.reset(OpAMD64VPSLLQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPSHLDQMasked128)
+               v.AuxInt = uint8ToAuxInt(a)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(x, y, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftMaskedUint64x8(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint64x4(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllLeftMaskedUint64x8 x (MOVQconst [c]) mask)
-       // result: (VPSLLQMasked512const [uint8(c)] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (ShiftAllLeftConcatMaskedUint64x4 [a] x y mask)
+       // result: (VPSHLDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
+               y := v_1
                mask := v_2
-               v.reset(OpAMD64VPSLLQMasked512const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPSHLDQMasked256)
+               v.AuxInt = uint8ToAuxInt(a)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               v.AddArg3(x, y, v0)
                return true
        }
-       // match: (ShiftAllLeftMaskedUint64x8 x y mask)
-       // result: (VPSLLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+}
+func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint64x8(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (ShiftAllLeftConcatMaskedUint64x8 [a] x y mask)
+       // result: (VPSHLDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
                mask := v_2
-               v.reset(OpAMD64VPSLLQMasked512)
+               v.reset(OpAMD64VPSHLDQMasked512)
+               v.AuxInt = uint8ToAuxInt(a)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(x, y, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftUint16x16(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllLeftMaskedInt16x16(v *Value) bool {
+       v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (ShiftAllLeftUint16x16 x (MOVQconst [c]))
-       // result: (VPSLLW256const [uint8(c)] x)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               v.reset(OpAMD64VPSLLW256const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg(x)
-               return true
-       }
-       // match: (ShiftAllLeftUint16x16 x y)
-       // result: (VPSLLW256 x y)
+       b := v.Block
+       // match: (ShiftAllLeftMaskedInt16x16 x y mask)
+       // result: (VPSLLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
        for {
                x := v_0
                y := v_1
-               v.reset(OpAMD64VPSLLW256)
-               v.AddArg2(x, y)
+               mask := v_2
+               v.reset(OpAMD64VPSLLWMasked256)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg3(x, y, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftUint16x32(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllLeftMaskedInt16x32(v *Value) bool {
+       v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (ShiftAllLeftUint16x32 x (MOVQconst [c]))
-       // result: (VPSLLW512const [uint8(c)] x)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               v.reset(OpAMD64VPSLLW512const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg(x)
-               return true
-       }
-       // match: (ShiftAllLeftUint16x32 x y)
-       // result: (VPSLLW512 x y)
+       b := v.Block
+       // match: (ShiftAllLeftMaskedInt16x32 x y mask)
+       // result: (VPSLLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
        for {
                x := v_0
                y := v_1
-               v.reset(OpAMD64VPSLLW512)
-               v.AddArg2(x, y)
+               mask := v_2
+               v.reset(OpAMD64VPSLLWMasked512)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg3(x, y, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftUint16x8(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllLeftMaskedInt16x8(v *Value) bool {
+       v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (ShiftAllLeftUint16x8 x (MOVQconst [c]))
-       // result: (VPSLLW128const [uint8(c)] x)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               v.reset(OpAMD64VPSLLW128const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg(x)
-               return true
-       }
-       // match: (ShiftAllLeftUint16x8 x y)
-       // result: (VPSLLW128 x y)
+       b := v.Block
+       // match: (ShiftAllLeftMaskedInt16x8 x y mask)
+       // result: (VPSLLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
        for {
                x := v_0
                y := v_1
-               v.reset(OpAMD64VPSLLW128)
-               v.AddArg2(x, y)
+               mask := v_2
+               v.reset(OpAMD64VPSLLWMasked128)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg3(x, y, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftUint32x16(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllLeftMaskedInt32x16(v *Value) bool {
+       v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (ShiftAllLeftUint32x16 x (MOVQconst [c]))
-       // result: (VPSLLD512const [uint8(c)] x)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               v.reset(OpAMD64VPSLLD512const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg(x)
-               return true
-       }
-       // match: (ShiftAllLeftUint32x16 x y)
-       // result: (VPSLLD512 x y)
+       b := v.Block
+       // match: (ShiftAllLeftMaskedInt32x16 x y mask)
+       // result: (VPSLLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
        for {
                x := v_0
                y := v_1
-               v.reset(OpAMD64VPSLLD512)
-               v.AddArg2(x, y)
+               mask := v_2
+               v.reset(OpAMD64VPSLLDMasked512)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg3(x, y, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftUint32x4(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllLeftMaskedInt32x4(v *Value) bool {
+       v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (ShiftAllLeftUint32x4 x (MOVQconst [c]))
-       // result: (VPSLLD128const [uint8(c)] x)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               v.reset(OpAMD64VPSLLD128const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg(x)
-               return true
-       }
-       // match: (ShiftAllLeftUint32x4 x y)
-       // result: (VPSLLD128 x y)
+       b := v.Block
+       // match: (ShiftAllLeftMaskedInt32x4 x y mask)
+       // result: (VPSLLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
        for {
                x := v_0
                y := v_1
-               v.reset(OpAMD64VPSLLD128)
-               v.AddArg2(x, y)
+               mask := v_2
+               v.reset(OpAMD64VPSLLDMasked128)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg3(x, y, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftUint32x8(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllLeftMaskedInt32x8(v *Value) bool {
+       v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (ShiftAllLeftUint32x8 x (MOVQconst [c]))
-       // result: (VPSLLD256const [uint8(c)] x)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               v.reset(OpAMD64VPSLLD256const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg(x)
-               return true
-       }
-       // match: (ShiftAllLeftUint32x8 x y)
-       // result: (VPSLLD256 x y)
+       b := v.Block
+       // match: (ShiftAllLeftMaskedInt32x8 x y mask)
+       // result: (VPSLLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
                x := v_0
                y := v_1
-               v.reset(OpAMD64VPSLLD256)
-               v.AddArg2(x, y)
+               mask := v_2
+               v.reset(OpAMD64VPSLLDMasked256)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg3(x, y, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftUint64x2(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllLeftMaskedInt64x2(v *Value) bool {
+       v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (ShiftAllLeftUint64x2 x (MOVQconst [c]))
-       // result: (VPSLLQ128const [uint8(c)] x)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               v.reset(OpAMD64VPSLLQ128const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg(x)
-               return true
-       }
-       // match: (ShiftAllLeftUint64x2 x y)
-       // result: (VPSLLQ128 x y)
+       b := v.Block
+       // match: (ShiftAllLeftMaskedInt64x2 x y mask)
+       // result: (VPSLLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
                x := v_0
                y := v_1
-               v.reset(OpAMD64VPSLLQ128)
-               v.AddArg2(x, y)
+               mask := v_2
+               v.reset(OpAMD64VPSLLQMasked128)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg3(x, y, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftUint64x4(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllLeftMaskedInt64x4(v *Value) bool {
+       v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (ShiftAllLeftUint64x4 x (MOVQconst [c]))
-       // result: (VPSLLQ256const [uint8(c)] x)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               v.reset(OpAMD64VPSLLQ256const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg(x)
-               return true
-       }
-       // match: (ShiftAllLeftUint64x4 x y)
-       // result: (VPSLLQ256 x y)
+       b := v.Block
+       // match: (ShiftAllLeftMaskedInt64x4 x y mask)
+       // result: (VPSLLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
                x := v_0
                y := v_1
-               v.reset(OpAMD64VPSLLQ256)
-               v.AddArg2(x, y)
+               mask := v_2
+               v.reset(OpAMD64VPSLLQMasked256)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg3(x, y, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllLeftUint64x8(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllLeftMaskedInt64x8(v *Value) bool {
+       v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (ShiftAllLeftUint64x8 x (MOVQconst [c]))
-       // result: (VPSLLQ512const [uint8(c)] x)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               v.reset(OpAMD64VPSLLQ512const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg(x)
-               return true
-       }
-       // match: (ShiftAllLeftUint64x8 x y)
-       // result: (VPSLLQ512 x y)
+       b := v.Block
+       // match: (ShiftAllLeftMaskedInt64x8 x y mask)
+       // result: (VPSLLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
        for {
                x := v_0
                y := v_1
-               v.reset(OpAMD64VPSLLQ512)
-               v.AddArg2(x, y)
+               mask := v_2
+               v.reset(OpAMD64VPSLLQMasked512)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg3(x, y, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt16x16(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllLeftMaskedUint16x16(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllRightConcatMaskedInt16x16 [a] x y mask)
-       // result: (VPSHRDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (ShiftAllLeftMaskedUint16x16 x y mask)
+       // result: (VPSLLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
                mask := v_2
-               v.reset(OpAMD64VPSHRDWMasked256)
-               v.AuxInt = uint8ToAuxInt(a)
+               v.reset(OpAMD64VPSLLWMasked256)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(x, y, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt16x32(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllLeftMaskedUint16x32(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllRightConcatMaskedInt16x32 [a] x y mask)
-       // result: (VPSHRDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (ShiftAllLeftMaskedUint16x32 x y mask)
+       // result: (VPSLLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
                mask := v_2
-               v.reset(OpAMD64VPSHRDWMasked512)
-               v.AuxInt = uint8ToAuxInt(a)
+               v.reset(OpAMD64VPSLLWMasked512)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(x, y, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt16x8(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllLeftMaskedUint16x8(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllRightConcatMaskedInt16x8 [a] x y mask)
-       // result: (VPSHRDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (ShiftAllLeftMaskedUint16x8 x y mask)
+       // result: (VPSLLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
                mask := v_2
-               v.reset(OpAMD64VPSHRDWMasked128)
-               v.AuxInt = uint8ToAuxInt(a)
+               v.reset(OpAMD64VPSLLWMasked128)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(x, y, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt32x16(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllLeftMaskedUint32x16(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllRightConcatMaskedInt32x16 [a] x y mask)
-       // result: (VPSHRDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (ShiftAllLeftMaskedUint32x16 x y mask)
+       // result: (VPSLLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
                mask := v_2
-               v.reset(OpAMD64VPSHRDDMasked512)
-               v.AuxInt = uint8ToAuxInt(a)
+               v.reset(OpAMD64VPSLLDMasked512)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(x, y, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt32x4(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllLeftMaskedUint32x4(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllRightConcatMaskedInt32x4 [a] x y mask)
-       // result: (VPSHRDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (ShiftAllLeftMaskedUint32x4 x y mask)
+       // result: (VPSLLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
                mask := v_2
-               v.reset(OpAMD64VPSHRDDMasked128)
-               v.AuxInt = uint8ToAuxInt(a)
+               v.reset(OpAMD64VPSLLDMasked128)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(x, y, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt32x8(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllLeftMaskedUint32x8(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllRightConcatMaskedInt32x8 [a] x y mask)
-       // result: (VPSHRDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (ShiftAllLeftMaskedUint32x8 x y mask)
+       // result: (VPSLLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
                mask := v_2
-               v.reset(OpAMD64VPSHRDDMasked256)
-               v.AuxInt = uint8ToAuxInt(a)
+               v.reset(OpAMD64VPSLLDMasked256)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(x, y, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt64x2(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllLeftMaskedUint64x2(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllRightConcatMaskedInt64x2 [a] x y mask)
-       // result: (VPSHRDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (ShiftAllLeftMaskedUint64x2 x y mask)
+       // result: (VPSLLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
                mask := v_2
-               v.reset(OpAMD64VPSHRDQMasked128)
-               v.AuxInt = uint8ToAuxInt(a)
+               v.reset(OpAMD64VPSLLQMasked128)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(x, y, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt64x4(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllLeftMaskedUint64x4(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllRightConcatMaskedInt64x4 [a] x y mask)
-       // result: (VPSHRDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (ShiftAllLeftMaskedUint64x4 x y mask)
+       // result: (VPSLLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
                mask := v_2
-               v.reset(OpAMD64VPSHRDQMasked256)
-               v.AuxInt = uint8ToAuxInt(a)
+               v.reset(OpAMD64VPSLLQMasked256)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(x, y, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt64x8(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllLeftMaskedUint64x8(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllRightConcatMaskedInt64x8 [a] x y mask)
-       // result: (VPSHRDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (ShiftAllLeftMaskedUint64x8 x y mask)
+       // result: (VPSLLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
        for {
-               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
                mask := v_2
-               v.reset(OpAMD64VPSHRDQMasked512)
-               v.AuxInt = uint8ToAuxInt(a)
+               v.reset(OpAMD64VPSLLQMasked512)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(x, y, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint16x16(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt16x16(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllRightConcatMaskedUint16x16 [a] x y mask)
+       // match: (ShiftAllRightConcatMaskedInt16x16 [a] x y mask)
        // result: (VPSHRDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
        for {
                a := auxIntToUint8(v.AuxInt)
@@ -53975,12 +53831,12 @@ func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint16x16(v *Value) bool {
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint16x32(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt16x32(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllRightConcatMaskedUint16x32 [a] x y mask)
+       // match: (ShiftAllRightConcatMaskedInt16x32 [a] x y mask)
        // result: (VPSHRDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
        for {
                a := auxIntToUint8(v.AuxInt)
@@ -53995,12 +53851,12 @@ func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint16x32(v *Value) bool {
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint16x8(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt16x8(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllRightConcatMaskedUint16x8 [a] x y mask)
+       // match: (ShiftAllRightConcatMaskedInt16x8 [a] x y mask)
        // result: (VPSHRDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
        for {
                a := auxIntToUint8(v.AuxInt)
@@ -54015,12 +53871,12 @@ func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint16x8(v *Value) bool {
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint32x16(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt32x16(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllRightConcatMaskedUint32x16 [a] x y mask)
+       // match: (ShiftAllRightConcatMaskedInt32x16 [a] x y mask)
        // result: (VPSHRDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
        for {
                a := auxIntToUint8(v.AuxInt)
@@ -54035,12 +53891,12 @@ func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint32x16(v *Value) bool {
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint32x4(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt32x4(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllRightConcatMaskedUint32x4 [a] x y mask)
+       // match: (ShiftAllRightConcatMaskedInt32x4 [a] x y mask)
        // result: (VPSHRDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
        for {
                a := auxIntToUint8(v.AuxInt)
@@ -54055,12 +53911,12 @@ func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint32x4(v *Value) bool {
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint32x8(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt32x8(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllRightConcatMaskedUint32x8 [a] x y mask)
+       // match: (ShiftAllRightConcatMaskedInt32x8 [a] x y mask)
        // result: (VPSHRDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
                a := auxIntToUint8(v.AuxInt)
@@ -54075,12 +53931,12 @@ func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint32x8(v *Value) bool {
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint64x2(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt64x2(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllRightConcatMaskedUint64x2 [a] x y mask)
+       // match: (ShiftAllRightConcatMaskedInt64x2 [a] x y mask)
        // result: (VPSHRDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
                a := auxIntToUint8(v.AuxInt)
@@ -54095,12 +53951,12 @@ func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint64x2(v *Value) bool {
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint64x4(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt64x4(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllRightConcatMaskedUint64x4 [a] x y mask)
+       // match: (ShiftAllRightConcatMaskedInt64x4 [a] x y mask)
        // result: (VPSHRDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
                a := auxIntToUint8(v.AuxInt)
@@ -54115,12 +53971,12 @@ func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint64x4(v *Value) bool {
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint64x8(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt64x8(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllRightConcatMaskedUint64x8 [a] x y mask)
+       // match: (ShiftAllRightConcatMaskedInt64x8 [a] x y mask)
        // result: (VPSHRDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
        for {
                a := auxIntToUint8(v.AuxInt)
@@ -54135,261 +53991,191 @@ func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint64x8(v *Value) bool {
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightInt16x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (ShiftAllRightInt16x16 x (MOVQconst [c]))
-       // result: (VPSRAW256const [uint8(c)] x)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               v.reset(OpAMD64VPSRAW256const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg(x)
-               return true
-       }
-       // match: (ShiftAllRightInt16x16 x y)
-       // result: (VPSRAW256 x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPSRAW256)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpShiftAllRightInt16x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (ShiftAllRightInt16x32 x (MOVQconst [c]))
-       // result: (VPSRAW512const [uint8(c)] x)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               v.reset(OpAMD64VPSRAW512const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg(x)
-               return true
-       }
-       // match: (ShiftAllRightInt16x32 x y)
-       // result: (VPSRAW512 x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPSRAW512)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpShiftAllRightInt16x8(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint16x16(v *Value) bool {
+       v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (ShiftAllRightInt16x8 x (MOVQconst [c]))
-       // result: (VPSRAW128const [uint8(c)] x)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               v.reset(OpAMD64VPSRAW128const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg(x)
-               return true
-       }
-       // match: (ShiftAllRightInt16x8 x y)
-       // result: (VPSRAW128 x y)
+       b := v.Block
+       // match: (ShiftAllRightConcatMaskedUint16x16 [a] x y mask)
+       // result: (VPSHRDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
-               v.reset(OpAMD64VPSRAW128)
-               v.AddArg2(x, y)
+               mask := v_2
+               v.reset(OpAMD64VPSHRDWMasked256)
+               v.AuxInt = uint8ToAuxInt(a)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg3(x, y, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightInt32x16(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint16x32(v *Value) bool {
+       v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (ShiftAllRightInt32x16 x (MOVQconst [c]))
-       // result: (VPSRAD512const [uint8(c)] x)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               v.reset(OpAMD64VPSRAD512const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg(x)
-               return true
-       }
-       // match: (ShiftAllRightInt32x16 x y)
-       // result: (VPSRAD512 x y)
+       b := v.Block
+       // match: (ShiftAllRightConcatMaskedUint16x32 [a] x y mask)
+       // result: (VPSHRDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
-               v.reset(OpAMD64VPSRAD512)
-               v.AddArg2(x, y)
+               mask := v_2
+               v.reset(OpAMD64VPSHRDWMasked512)
+               v.AuxInt = uint8ToAuxInt(a)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg3(x, y, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightInt32x4(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint16x8(v *Value) bool {
+       v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (ShiftAllRightInt32x4 x (MOVQconst [c]))
-       // result: (VPSRAD128const [uint8(c)] x)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               v.reset(OpAMD64VPSRAD128const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg(x)
-               return true
-       }
-       // match: (ShiftAllRightInt32x4 x y)
-       // result: (VPSRAD128 x y)
+       b := v.Block
+       // match: (ShiftAllRightConcatMaskedUint16x8 [a] x y mask)
+       // result: (VPSHRDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
-               v.reset(OpAMD64VPSRAD128)
-               v.AddArg2(x, y)
+               mask := v_2
+               v.reset(OpAMD64VPSHRDWMasked128)
+               v.AuxInt = uint8ToAuxInt(a)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg3(x, y, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightInt32x8(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint32x16(v *Value) bool {
+       v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (ShiftAllRightInt32x8 x (MOVQconst [c]))
-       // result: (VPSRAD256const [uint8(c)] x)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               v.reset(OpAMD64VPSRAD256const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg(x)
-               return true
-       }
-       // match: (ShiftAllRightInt32x8 x y)
-       // result: (VPSRAD256 x y)
+       b := v.Block
+       // match: (ShiftAllRightConcatMaskedUint32x16 [a] x y mask)
+       // result: (VPSHRDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
-               v.reset(OpAMD64VPSRAD256)
-               v.AddArg2(x, y)
+               mask := v_2
+               v.reset(OpAMD64VPSHRDDMasked512)
+               v.AuxInt = uint8ToAuxInt(a)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg3(x, y, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightInt64x2(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint32x4(v *Value) bool {
+       v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (ShiftAllRightInt64x2 x (MOVQconst [c]))
-       // result: (VPSRAQ128const [uint8(c)] x)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               v.reset(OpAMD64VPSRAQ128const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg(x)
-               return true
-       }
-       // match: (ShiftAllRightInt64x2 x y)
-       // result: (VPSRAQ128 x y)
+       b := v.Block
+       // match: (ShiftAllRightConcatMaskedUint32x4 [a] x y mask)
+       // result: (VPSHRDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
-               v.reset(OpAMD64VPSRAQ128)
-               v.AddArg2(x, y)
+               mask := v_2
+               v.reset(OpAMD64VPSHRDDMasked128)
+               v.AuxInt = uint8ToAuxInt(a)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg3(x, y, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightInt64x4(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint32x8(v *Value) bool {
+       v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (ShiftAllRightInt64x4 x (MOVQconst [c]))
-       // result: (VPSRAQ256const [uint8(c)] x)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               v.reset(OpAMD64VPSRAQ256const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg(x)
-               return true
-       }
-       // match: (ShiftAllRightInt64x4 x y)
-       // result: (VPSRAQ256 x y)
+       b := v.Block
+       // match: (ShiftAllRightConcatMaskedUint32x8 [a] x y mask)
+       // result: (VPSHRDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
-               v.reset(OpAMD64VPSRAQ256)
-               v.AddArg2(x, y)
+               mask := v_2
+               v.reset(OpAMD64VPSHRDDMasked256)
+               v.AuxInt = uint8ToAuxInt(a)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg3(x, y, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightInt64x8(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint64x2(v *Value) bool {
+       v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (ShiftAllRightInt64x8 x (MOVQconst [c]))
-       // result: (VPSRAQ512const [uint8(c)] x)
+       b := v.Block
+       // match: (ShiftAllRightConcatMaskedUint64x2 [a] x y mask)
+       // result: (VPSHRDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               v.reset(OpAMD64VPSRAQ512const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg(x)
+               y := v_1
+               mask := v_2
+               v.reset(OpAMD64VPSHRDQMasked128)
+               v.AuxInt = uint8ToAuxInt(a)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg3(x, y, v0)
                return true
        }
-       // match: (ShiftAllRightInt64x8 x y)
-       // result: (VPSRAQ512 x y)
+}
+func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint64x4(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (ShiftAllRightConcatMaskedUint64x4 [a] x y mask)
+       // result: (VPSHRDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
                y := v_1
-               v.reset(OpAMD64VPSRAQ512)
-               v.AddArg2(x, y)
+               mask := v_2
+               v.reset(OpAMD64VPSHRDQMasked256)
+               v.AuxInt = uint8ToAuxInt(a)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg3(x, y, v0)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightMaskedInt16x16(v *Value) bool {
+func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint64x8(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllRightMaskedInt16x16 x (MOVQconst [c]) mask)
-       // result: (VPSRAWMasked256const [uint8(c)] x (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (ShiftAllRightConcatMaskedUint64x8 [a] x y mask)
+       // result: (VPSHRDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
        for {
+               a := auxIntToUint8(v.AuxInt)
                x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
+               y := v_1
                mask := v_2
-               v.reset(OpAMD64VPSRAWMasked256const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+               v.reset(OpAMD64VPSHRDQMasked512)
+               v.AuxInt = uint8ToAuxInt(a)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               v.AddArg3(x, y, v0)
                return true
        }
+}
+func rewriteValueAMD64_OpShiftAllRightMaskedInt16x16(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
        // match: (ShiftAllRightMaskedInt16x16 x y mask)
        // result: (VPSRAWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
        for {
@@ -54408,22 +54194,6 @@ func rewriteValueAMD64_OpShiftAllRightMaskedInt16x32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllRightMaskedInt16x32 x (MOVQconst [c]) mask)
-       // result: (VPSRAWMasked512const [uint8(c)] x (VPMOVVec16x32ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSRAWMasked512const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
        // match: (ShiftAllRightMaskedInt16x32 x y mask)
        // result: (VPSRAWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
        for {
@@ -54442,22 +54212,6 @@ func rewriteValueAMD64_OpShiftAllRightMaskedInt16x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllRightMaskedInt16x8 x (MOVQconst [c]) mask)
-       // result: (VPSRAWMasked128const [uint8(c)] x (VPMOVVec16x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSRAWMasked128const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
        // match: (ShiftAllRightMaskedInt16x8 x y mask)
        // result: (VPSRAWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
        for {
@@ -54476,22 +54230,6 @@ func rewriteValueAMD64_OpShiftAllRightMaskedInt32x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllRightMaskedInt32x16 x (MOVQconst [c]) mask)
-       // result: (VPSRADMasked512const [uint8(c)] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSRADMasked512const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
        // match: (ShiftAllRightMaskedInt32x16 x y mask)
        // result: (VPSRADMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
        for {
@@ -54510,22 +54248,6 @@ func rewriteValueAMD64_OpShiftAllRightMaskedInt32x4(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllRightMaskedInt32x4 x (MOVQconst [c]) mask)
-       // result: (VPSRADMasked128const [uint8(c)] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSRADMasked128const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
        // match: (ShiftAllRightMaskedInt32x4 x y mask)
        // result: (VPSRADMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
        for {
@@ -54544,22 +54266,6 @@ func rewriteValueAMD64_OpShiftAllRightMaskedInt32x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllRightMaskedInt32x8 x (MOVQconst [c]) mask)
-       // result: (VPSRADMasked256const [uint8(c)] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSRADMasked256const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
        // match: (ShiftAllRightMaskedInt32x8 x y mask)
        // result: (VPSRADMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
@@ -54578,22 +54284,6 @@ func rewriteValueAMD64_OpShiftAllRightMaskedInt64x2(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllRightMaskedInt64x2 x (MOVQconst [c]) mask)
-       // result: (VPSRAQMasked128const [uint8(c)] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSRAQMasked128const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
        // match: (ShiftAllRightMaskedInt64x2 x y mask)
        // result: (VPSRAQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
@@ -54612,22 +54302,6 @@ func rewriteValueAMD64_OpShiftAllRightMaskedInt64x4(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllRightMaskedInt64x4 x (MOVQconst [c]) mask)
-       // result: (VPSRAQMasked256const [uint8(c)] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSRAQMasked256const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
        // match: (ShiftAllRightMaskedInt64x4 x y mask)
        // result: (VPSRAQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
@@ -54646,22 +54320,6 @@ func rewriteValueAMD64_OpShiftAllRightMaskedInt64x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllRightMaskedInt64x8 x (MOVQconst [c]) mask)
-       // result: (VPSRAQMasked512const [uint8(c)] x (VPMOVVec64x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSRAQMasked512const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
        // match: (ShiftAllRightMaskedInt64x8 x y mask)
        // result: (VPSRAQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
        for {
@@ -54680,22 +54338,6 @@ func rewriteValueAMD64_OpShiftAllRightMaskedUint16x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllRightMaskedUint16x16 x (MOVQconst [c]) mask)
-       // result: (VPSRLWMasked256const [uint8(c)] x (VPMOVVec16x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSRLWMasked256const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
        // match: (ShiftAllRightMaskedUint16x16 x y mask)
        // result: (VPSRLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
        for {
@@ -54714,22 +54356,6 @@ func rewriteValueAMD64_OpShiftAllRightMaskedUint16x32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllRightMaskedUint16x32 x (MOVQconst [c]) mask)
-       // result: (VPSRLWMasked512const [uint8(c)] x (VPMOVVec16x32ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSRLWMasked512const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
        // match: (ShiftAllRightMaskedUint16x32 x y mask)
        // result: (VPSRLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
        for {
@@ -54748,22 +54374,6 @@ func rewriteValueAMD64_OpShiftAllRightMaskedUint16x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllRightMaskedUint16x8 x (MOVQconst [c]) mask)
-       // result: (VPSRLWMasked128const [uint8(c)] x (VPMOVVec16x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSRLWMasked128const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
        // match: (ShiftAllRightMaskedUint16x8 x y mask)
        // result: (VPSRLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
        for {
@@ -54782,22 +54392,6 @@ func rewriteValueAMD64_OpShiftAllRightMaskedUint32x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllRightMaskedUint32x16 x (MOVQconst [c]) mask)
-       // result: (VPSRLDMasked512const [uint8(c)] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSRLDMasked512const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
        // match: (ShiftAllRightMaskedUint32x16 x y mask)
        // result: (VPSRLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
        for {
@@ -54816,22 +54410,6 @@ func rewriteValueAMD64_OpShiftAllRightMaskedUint32x4(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllRightMaskedUint32x4 x (MOVQconst [c]) mask)
-       // result: (VPSRLDMasked128const [uint8(c)] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSRLDMasked128const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
        // match: (ShiftAllRightMaskedUint32x4 x y mask)
        // result: (VPSRLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
        for {
@@ -54850,22 +54428,6 @@ func rewriteValueAMD64_OpShiftAllRightMaskedUint32x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllRightMaskedUint32x8 x (MOVQconst [c]) mask)
-       // result: (VPSRLDMasked256const [uint8(c)] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSRLDMasked256const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
        // match: (ShiftAllRightMaskedUint32x8 x y mask)
        // result: (VPSRLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
@@ -54884,22 +54446,6 @@ func rewriteValueAMD64_OpShiftAllRightMaskedUint64x2(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllRightMaskedUint64x2 x (MOVQconst [c]) mask)
-       // result: (VPSRLQMasked128const [uint8(c)] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSRLQMasked128const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
        // match: (ShiftAllRightMaskedUint64x2 x y mask)
        // result: (VPSRLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
@@ -54918,22 +54464,6 @@ func rewriteValueAMD64_OpShiftAllRightMaskedUint64x4(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllRightMaskedUint64x4 x (MOVQconst [c]) mask)
-       // result: (VPSRLQMasked256const [uint8(c)] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSRLQMasked256const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
        // match: (ShiftAllRightMaskedUint64x4 x y mask)
        // result: (VPSRLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
@@ -54952,22 +54482,6 @@ func rewriteValueAMD64_OpShiftAllRightMaskedUint64x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
-       // match: (ShiftAllRightMaskedUint64x8 x (MOVQconst [c]) mask)
-       // result: (VPSRLQMasked512const [uint8(c)] x (VPMOVVec64x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSRLQMasked512const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
        // match: (ShiftAllRightMaskedUint64x8 x y mask)
        // result: (VPSRLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
        for {
@@ -54981,240 +54495,6 @@ func rewriteValueAMD64_OpShiftAllRightMaskedUint64x8(v *Value) bool {
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightUint16x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (ShiftAllRightUint16x16 x (MOVQconst [c]))
-       // result: (VPSRLW256const [uint8(c)] x)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               v.reset(OpAMD64VPSRLW256const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg(x)
-               return true
-       }
-       // match: (ShiftAllRightUint16x16 x y)
-       // result: (VPSRLW256 x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPSRLW256)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpShiftAllRightUint16x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (ShiftAllRightUint16x32 x (MOVQconst [c]))
-       // result: (VPSRLW512const [uint8(c)] x)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               v.reset(OpAMD64VPSRLW512const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg(x)
-               return true
-       }
-       // match: (ShiftAllRightUint16x32 x y)
-       // result: (VPSRLW512 x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPSRLW512)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpShiftAllRightUint16x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (ShiftAllRightUint16x8 x (MOVQconst [c]))
-       // result: (VPSRLW128const [uint8(c)] x)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               v.reset(OpAMD64VPSRLW128const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg(x)
-               return true
-       }
-       // match: (ShiftAllRightUint16x8 x y)
-       // result: (VPSRLW128 x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPSRLW128)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpShiftAllRightUint32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (ShiftAllRightUint32x16 x (MOVQconst [c]))
-       // result: (VPSRLD512const [uint8(c)] x)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               v.reset(OpAMD64VPSRLD512const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg(x)
-               return true
-       }
-       // match: (ShiftAllRightUint32x16 x y)
-       // result: (VPSRLD512 x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPSRLD512)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpShiftAllRightUint32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (ShiftAllRightUint32x4 x (MOVQconst [c]))
-       // result: (VPSRLD128const [uint8(c)] x)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               v.reset(OpAMD64VPSRLD128const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg(x)
-               return true
-       }
-       // match: (ShiftAllRightUint32x4 x y)
-       // result: (VPSRLD128 x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPSRLD128)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpShiftAllRightUint32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (ShiftAllRightUint32x8 x (MOVQconst [c]))
-       // result: (VPSRLD256const [uint8(c)] x)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               v.reset(OpAMD64VPSRLD256const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg(x)
-               return true
-       }
-       // match: (ShiftAllRightUint32x8 x y)
-       // result: (VPSRLD256 x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPSRLD256)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpShiftAllRightUint64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (ShiftAllRightUint64x2 x (MOVQconst [c]))
-       // result: (VPSRLQ128const [uint8(c)] x)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               v.reset(OpAMD64VPSRLQ128const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg(x)
-               return true
-       }
-       // match: (ShiftAllRightUint64x2 x y)
-       // result: (VPSRLQ128 x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPSRLQ128)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpShiftAllRightUint64x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (ShiftAllRightUint64x4 x (MOVQconst [c]))
-       // result: (VPSRLQ256const [uint8(c)] x)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               v.reset(OpAMD64VPSRLQ256const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg(x)
-               return true
-       }
-       // match: (ShiftAllRightUint64x4 x y)
-       // result: (VPSRLQ256 x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPSRLQ256)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpShiftAllRightUint64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (ShiftAllRightUint64x8 x (MOVQconst [c]))
-       // result: (VPSRLQ512const [uint8(c)] x)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               v.reset(OpAMD64VPSRLQ512const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg(x)
-               return true
-       }
-       // match: (ShiftAllRightUint64x8 x y)
-       // result: (VPSRLQ512 x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPSRLQ512)
-               v.AddArg2(x, y)
-               return true
-       }
-}
 func rewriteValueAMD64_OpShiftLeftConcatMaskedInt16x16(v *Value) bool {
        v_3 := v.Args[3]
        v_2 := v.Args[2]