]> Cypherpunks repositories - gostls13.git/commitdiff
[dev.simd] cmd/compile: add ShiftAll immediate variant
authorJunyang Shao <shaojunyang@google.com>
Tue, 5 Aug 2025 04:28:44 +0000 (04:28 +0000)
committerJunyang Shao <shaojunyang@google.com>
Tue, 5 Aug 2025 15:37:44 +0000 (08:37 -0700)
This CL is generated by CL 693136.

Change-Id: Ifd2278d3f927efa008a14cc5e592e7c14b7120ff
Reviewed-on: https://go-review.googlesource.com/c/go/+/693157
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
src/cmd/compile/internal/amd64/simdssa.go
src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteAMD64.go
src/simd/simd_test.go

index 76ef42576d32c31f4aba74d7cda9a8eb475c26d7..bd6af6221d54c4f46f0f017ed1b83cd3321e0613 100644 (file)
@@ -689,7 +689,34 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
                ssa.OpAMD64VPRORD512,
                ssa.OpAMD64VPRORQ128,
                ssa.OpAMD64VPRORQ256,
-               ssa.OpAMD64VPRORQ512:
+               ssa.OpAMD64VPRORQ512,
+               ssa.OpAMD64VPSLLW128const,
+               ssa.OpAMD64VPSLLW256const,
+               ssa.OpAMD64VPSLLW512const,
+               ssa.OpAMD64VPSLLD128const,
+               ssa.OpAMD64VPSLLD256const,
+               ssa.OpAMD64VPSLLD512const,
+               ssa.OpAMD64VPSLLQ128const,
+               ssa.OpAMD64VPSLLQ256const,
+               ssa.OpAMD64VPSLLQ512const,
+               ssa.OpAMD64VPSRLW128const,
+               ssa.OpAMD64VPSRLW256const,
+               ssa.OpAMD64VPSRLW512const,
+               ssa.OpAMD64VPSRLD128const,
+               ssa.OpAMD64VPSRLD256const,
+               ssa.OpAMD64VPSRLD512const,
+               ssa.OpAMD64VPSRLQ128const,
+               ssa.OpAMD64VPSRLQ256const,
+               ssa.OpAMD64VPSRLQ512const,
+               ssa.OpAMD64VPSRAW128const,
+               ssa.OpAMD64VPSRAW256const,
+               ssa.OpAMD64VPSRAW512const,
+               ssa.OpAMD64VPSRAD128const,
+               ssa.OpAMD64VPSRAD256const,
+               ssa.OpAMD64VPSRAD512const,
+               ssa.OpAMD64VPSRAQ128const,
+               ssa.OpAMD64VPSRAQ256const,
+               ssa.OpAMD64VPSRAQ512const:
                p = simdV11Imm8(s, v)
 
        case ssa.OpAMD64VRNDSCALEPSMasked128,
@@ -715,7 +742,34 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
                ssa.OpAMD64VPRORDMasked512,
                ssa.OpAMD64VPRORQMasked128,
                ssa.OpAMD64VPRORQMasked256,
-               ssa.OpAMD64VPRORQMasked512:
+               ssa.OpAMD64VPRORQMasked512,
+               ssa.OpAMD64VPSLLWMasked128const,
+               ssa.OpAMD64VPSLLWMasked256const,
+               ssa.OpAMD64VPSLLWMasked512const,
+               ssa.OpAMD64VPSLLDMasked128const,
+               ssa.OpAMD64VPSLLDMasked256const,
+               ssa.OpAMD64VPSLLDMasked512const,
+               ssa.OpAMD64VPSLLQMasked128const,
+               ssa.OpAMD64VPSLLQMasked256const,
+               ssa.OpAMD64VPSLLQMasked512const,
+               ssa.OpAMD64VPSRLWMasked128const,
+               ssa.OpAMD64VPSRLWMasked256const,
+               ssa.OpAMD64VPSRLWMasked512const,
+               ssa.OpAMD64VPSRLDMasked128const,
+               ssa.OpAMD64VPSRLDMasked256const,
+               ssa.OpAMD64VPSRLDMasked512const,
+               ssa.OpAMD64VPSRLQMasked128const,
+               ssa.OpAMD64VPSRLQMasked256const,
+               ssa.OpAMD64VPSRLQMasked512const,
+               ssa.OpAMD64VPSRAWMasked128const,
+               ssa.OpAMD64VPSRAWMasked256const,
+               ssa.OpAMD64VPSRAWMasked512const,
+               ssa.OpAMD64VPSRADMasked128const,
+               ssa.OpAMD64VPSRADMasked256const,
+               ssa.OpAMD64VPSRADMasked512const,
+               ssa.OpAMD64VPSRAQMasked128const,
+               ssa.OpAMD64VPSRAQMasked256const,
+               ssa.OpAMD64VPSRAQMasked512const:
                p = simdVkvImm8(s, v)
 
        case ssa.OpAMD64VDPPS128,
@@ -1497,7 +1551,34 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
                ssa.OpAMD64VPXORDMasked512,
                ssa.OpAMD64VPXORQMasked128,
                ssa.OpAMD64VPXORQMasked256,
-               ssa.OpAMD64VPXORQMasked512:
+               ssa.OpAMD64VPXORQMasked512,
+               ssa.OpAMD64VPSLLWMasked128const,
+               ssa.OpAMD64VPSLLWMasked256const,
+               ssa.OpAMD64VPSLLWMasked512const,
+               ssa.OpAMD64VPSLLDMasked128const,
+               ssa.OpAMD64VPSLLDMasked256const,
+               ssa.OpAMD64VPSLLDMasked512const,
+               ssa.OpAMD64VPSLLQMasked128const,
+               ssa.OpAMD64VPSLLQMasked256const,
+               ssa.OpAMD64VPSLLQMasked512const,
+               ssa.OpAMD64VPSRLWMasked128const,
+               ssa.OpAMD64VPSRLWMasked256const,
+               ssa.OpAMD64VPSRLWMasked512const,
+               ssa.OpAMD64VPSRLDMasked128const,
+               ssa.OpAMD64VPSRLDMasked256const,
+               ssa.OpAMD64VPSRLDMasked512const,
+               ssa.OpAMD64VPSRLQMasked128const,
+               ssa.OpAMD64VPSRLQMasked256const,
+               ssa.OpAMD64VPSRLQMasked512const,
+               ssa.OpAMD64VPSRAWMasked128const,
+               ssa.OpAMD64VPSRAWMasked256const,
+               ssa.OpAMD64VPSRAWMasked512const,
+               ssa.OpAMD64VPSRADMasked128const,
+               ssa.OpAMD64VPSRADMasked256const,
+               ssa.OpAMD64VPSRADMasked512const,
+               ssa.OpAMD64VPSRAQMasked128const,
+               ssa.OpAMD64VPSRAQMasked256const,
+               ssa.OpAMD64VPSRAQMasked512const:
                x86.ParseSuffix(p, "Z")
        }
 
index 060f220c7de758b3d4dee652dcd237794eb56e78..b8bd0d9b4cae7ba0c4539c23100a114e6c3caabc 100644 (file)
 (SetElemUint16x8 ...) => (VPINSRW128 ...)
 (SetElemUint32x4 ...) => (VPINSRD128 ...)
 (SetElemUint64x2 ...) => (VPINSRQ128 ...)
-(ShiftAllLeftInt16x8 ...) => (VPSLLW128 ...)
-(ShiftAllLeftInt16x16 ...) => (VPSLLW256 ...)
-(ShiftAllLeftInt16x32 ...) => (VPSLLW512 ...)
-(ShiftAllLeftInt32x4 ...) => (VPSLLD128 ...)
-(ShiftAllLeftInt32x8 ...) => (VPSLLD256 ...)
-(ShiftAllLeftInt32x16 ...) => (VPSLLD512 ...)
-(ShiftAllLeftInt64x2 ...) => (VPSLLQ128 ...)
-(ShiftAllLeftInt64x4 ...) => (VPSLLQ256 ...)
-(ShiftAllLeftInt64x8 ...) => (VPSLLQ512 ...)
-(ShiftAllLeftUint16x8 ...) => (VPSLLW128 ...)
-(ShiftAllLeftUint16x16 ...) => (VPSLLW256 ...)
-(ShiftAllLeftUint16x32 ...) => (VPSLLW512 ...)
-(ShiftAllLeftUint32x4 ...) => (VPSLLD128 ...)
-(ShiftAllLeftUint32x8 ...) => (VPSLLD256 ...)
-(ShiftAllLeftUint32x16 ...) => (VPSLLD512 ...)
-(ShiftAllLeftUint64x2 ...) => (VPSLLQ128 ...)
-(ShiftAllLeftUint64x4 ...) => (VPSLLQ256 ...)
-(ShiftAllLeftUint64x8 ...) => (VPSLLQ512 ...)
+(ShiftAllLeftInt16x8 x (MOVQconst [c])) => (VPSLLW128const [int8(c)] x)
+(ShiftAllLeftInt16x8 x y) => (VPSLLW128 x y)
+(ShiftAllLeftInt16x16 x (MOVQconst [c])) => (VPSLLW256const [int8(c)] x)
+(ShiftAllLeftInt16x16 x y) => (VPSLLW256 x y)
+(ShiftAllLeftInt16x32 x (MOVQconst [c])) => (VPSLLW512const [int8(c)] x)
+(ShiftAllLeftInt16x32 x y) => (VPSLLW512 x y)
+(ShiftAllLeftInt32x4 x (MOVQconst [c])) => (VPSLLD128const [int8(c)] x)
+(ShiftAllLeftInt32x4 x y) => (VPSLLD128 x y)
+(ShiftAllLeftInt32x8 x (MOVQconst [c])) => (VPSLLD256const [int8(c)] x)
+(ShiftAllLeftInt32x8 x y) => (VPSLLD256 x y)
+(ShiftAllLeftInt32x16 x (MOVQconst [c])) => (VPSLLD512const [int8(c)] x)
+(ShiftAllLeftInt32x16 x y) => (VPSLLD512 x y)
+(ShiftAllLeftInt64x2 x (MOVQconst [c])) => (VPSLLQ128const [int8(c)] x)
+(ShiftAllLeftInt64x2 x y) => (VPSLLQ128 x y)
+(ShiftAllLeftInt64x4 x (MOVQconst [c])) => (VPSLLQ256const [int8(c)] x)
+(ShiftAllLeftInt64x4 x y) => (VPSLLQ256 x y)
+(ShiftAllLeftInt64x8 x (MOVQconst [c])) => (VPSLLQ512const [int8(c)] x)
+(ShiftAllLeftInt64x8 x y) => (VPSLLQ512 x y)
+(ShiftAllLeftUint16x8 x (MOVQconst [c])) => (VPSLLW128const [int8(c)] x)
+(ShiftAllLeftUint16x8 x y) => (VPSLLW128 x y)
+(ShiftAllLeftUint16x16 x (MOVQconst [c])) => (VPSLLW256const [int8(c)] x)
+(ShiftAllLeftUint16x16 x y) => (VPSLLW256 x y)
+(ShiftAllLeftUint16x32 x (MOVQconst [c])) => (VPSLLW512const [int8(c)] x)
+(ShiftAllLeftUint16x32 x y) => (VPSLLW512 x y)
+(ShiftAllLeftUint32x4 x (MOVQconst [c])) => (VPSLLD128const [int8(c)] x)
+(ShiftAllLeftUint32x4 x y) => (VPSLLD128 x y)
+(ShiftAllLeftUint32x8 x (MOVQconst [c])) => (VPSLLD256const [int8(c)] x)
+(ShiftAllLeftUint32x8 x y) => (VPSLLD256 x y)
+(ShiftAllLeftUint32x16 x (MOVQconst [c])) => (VPSLLD512const [int8(c)] x)
+(ShiftAllLeftUint32x16 x y) => (VPSLLD512 x y)
+(ShiftAllLeftUint64x2 x (MOVQconst [c])) => (VPSLLQ128const [int8(c)] x)
+(ShiftAllLeftUint64x2 x y) => (VPSLLQ128 x y)
+(ShiftAllLeftUint64x4 x (MOVQconst [c])) => (VPSLLQ256const [int8(c)] x)
+(ShiftAllLeftUint64x4 x y) => (VPSLLQ256 x y)
+(ShiftAllLeftUint64x8 x (MOVQconst [c])) => (VPSLLQ512const [int8(c)] x)
+(ShiftAllLeftUint64x8 x y) => (VPSLLQ512 x y)
 (ShiftAllLeftConcatInt16x8 ...) => (VPSHLDW128 ...)
 (ShiftAllLeftConcatInt16x16 ...) => (VPSHLDW256 ...)
 (ShiftAllLeftConcatInt16x32 ...) => (VPSHLDW512 ...)
 (ShiftAllLeftConcatMaskedUint64x2 [a] x y mask) => (VPSHLDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
 (ShiftAllLeftConcatMaskedUint64x4 [a] x y mask) => (VPSHLDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
 (ShiftAllLeftConcatMaskedUint64x8 [a] x y mask) => (VPSHLDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+(ShiftAllLeftMaskedInt16x8 x (MOVQconst [c]) mask) => (VPSLLWMasked128const [int8(c)] x (VPMOVVec16x8ToM <types.TypeMask> mask))
 (ShiftAllLeftMaskedInt16x8 x y mask) => (VPSLLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(ShiftAllLeftMaskedInt16x16 x (MOVQconst [c]) mask) => (VPSLLWMasked256const [int8(c)] x (VPMOVVec16x16ToM <types.TypeMask> mask))
 (ShiftAllLeftMaskedInt16x16 x y mask) => (VPSLLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(ShiftAllLeftMaskedInt16x32 x (MOVQconst [c]) mask) => (VPSLLWMasked512const [int8(c)] x (VPMOVVec16x32ToM <types.TypeMask> mask))
 (ShiftAllLeftMaskedInt16x32 x y mask) => (VPSLLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+(ShiftAllLeftMaskedInt32x4 x (MOVQconst [c]) mask) => (VPSLLDMasked128const [int8(c)] x (VPMOVVec32x4ToM <types.TypeMask> mask))
 (ShiftAllLeftMaskedInt32x4 x y mask) => (VPSLLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+(ShiftAllLeftMaskedInt32x8 x (MOVQconst [c]) mask) => (VPSLLDMasked256const [int8(c)] x (VPMOVVec32x8ToM <types.TypeMask> mask))
 (ShiftAllLeftMaskedInt32x8 x y mask) => (VPSLLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(ShiftAllLeftMaskedInt32x16 x (MOVQconst [c]) mask) => (VPSLLDMasked512const [int8(c)] x (VPMOVVec32x16ToM <types.TypeMask> mask))
 (ShiftAllLeftMaskedInt32x16 x y mask) => (VPSLLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+(ShiftAllLeftMaskedInt64x2 x (MOVQconst [c]) mask) => (VPSLLQMasked128const [int8(c)] x (VPMOVVec64x2ToM <types.TypeMask> mask))
 (ShiftAllLeftMaskedInt64x2 x y mask) => (VPSLLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+(ShiftAllLeftMaskedInt64x4 x (MOVQconst [c]) mask) => (VPSLLQMasked256const [int8(c)] x (VPMOVVec64x4ToM <types.TypeMask> mask))
 (ShiftAllLeftMaskedInt64x4 x y mask) => (VPSLLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+(ShiftAllLeftMaskedInt64x8 x (MOVQconst [c]) mask) => (VPSLLQMasked512const [int8(c)] x (VPMOVVec64x8ToM <types.TypeMask> mask))
 (ShiftAllLeftMaskedInt64x8 x y mask) => (VPSLLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+(ShiftAllLeftMaskedUint16x8 x (MOVQconst [c]) mask) => (VPSLLWMasked128const [int8(c)] x (VPMOVVec16x8ToM <types.TypeMask> mask))
 (ShiftAllLeftMaskedUint16x8 x y mask) => (VPSLLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(ShiftAllLeftMaskedUint16x16 x (MOVQconst [c]) mask) => (VPSLLWMasked256const [int8(c)] x (VPMOVVec16x16ToM <types.TypeMask> mask))
 (ShiftAllLeftMaskedUint16x16 x y mask) => (VPSLLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(ShiftAllLeftMaskedUint16x32 x (MOVQconst [c]) mask) => (VPSLLWMasked512const [int8(c)] x (VPMOVVec16x32ToM <types.TypeMask> mask))
 (ShiftAllLeftMaskedUint16x32 x y mask) => (VPSLLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+(ShiftAllLeftMaskedUint32x4 x (MOVQconst [c]) mask) => (VPSLLDMasked128const [int8(c)] x (VPMOVVec32x4ToM <types.TypeMask> mask))
 (ShiftAllLeftMaskedUint32x4 x y mask) => (VPSLLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+(ShiftAllLeftMaskedUint32x8 x (MOVQconst [c]) mask) => (VPSLLDMasked256const [int8(c)] x (VPMOVVec32x8ToM <types.TypeMask> mask))
 (ShiftAllLeftMaskedUint32x8 x y mask) => (VPSLLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(ShiftAllLeftMaskedUint32x16 x (MOVQconst [c]) mask) => (VPSLLDMasked512const [int8(c)] x (VPMOVVec32x16ToM <types.TypeMask> mask))
 (ShiftAllLeftMaskedUint32x16 x y mask) => (VPSLLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+(ShiftAllLeftMaskedUint64x2 x (MOVQconst [c]) mask) => (VPSLLQMasked128const [int8(c)] x (VPMOVVec64x2ToM <types.TypeMask> mask))
 (ShiftAllLeftMaskedUint64x2 x y mask) => (VPSLLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+(ShiftAllLeftMaskedUint64x4 x (MOVQconst [c]) mask) => (VPSLLQMasked256const [int8(c)] x (VPMOVVec64x4ToM <types.TypeMask> mask))
 (ShiftAllLeftMaskedUint64x4 x y mask) => (VPSLLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+(ShiftAllLeftMaskedUint64x8 x (MOVQconst [c]) mask) => (VPSLLQMasked512const [int8(c)] x (VPMOVVec64x8ToM <types.TypeMask> mask))
 (ShiftAllLeftMaskedUint64x8 x y mask) => (VPSLLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftAllRightInt16x8 ...) => (VPSRAW128 ...)
-(ShiftAllRightInt16x16 ...) => (VPSRAW256 ...)
-(ShiftAllRightInt16x32 ...) => (VPSRAW512 ...)
-(ShiftAllRightInt32x4 ...) => (VPSRAD128 ...)
-(ShiftAllRightInt32x8 ...) => (VPSRAD256 ...)
-(ShiftAllRightInt32x16 ...) => (VPSRAD512 ...)
-(ShiftAllRightInt64x2 ...) => (VPSRAQ128 ...)
-(ShiftAllRightInt64x4 ...) => (VPSRAQ256 ...)
-(ShiftAllRightInt64x8 ...) => (VPSRAQ512 ...)
-(ShiftAllRightUint16x8 ...) => (VPSRLW128 ...)
-(ShiftAllRightUint16x16 ...) => (VPSRLW256 ...)
-(ShiftAllRightUint16x32 ...) => (VPSRLW512 ...)
-(ShiftAllRightUint32x4 ...) => (VPSRLD128 ...)
-(ShiftAllRightUint32x8 ...) => (VPSRLD256 ...)
-(ShiftAllRightUint32x16 ...) => (VPSRLD512 ...)
-(ShiftAllRightUint64x2 ...) => (VPSRLQ128 ...)
-(ShiftAllRightUint64x4 ...) => (VPSRLQ256 ...)
-(ShiftAllRightUint64x8 ...) => (VPSRLQ512 ...)
+(ShiftAllRightInt16x8 x (MOVQconst [c])) => (VPSRAW128const [int8(c)] x)
+(ShiftAllRightInt16x8 x y) => (VPSRAW128 x y)
+(ShiftAllRightInt16x16 x (MOVQconst [c])) => (VPSRAW256const [int8(c)] x)
+(ShiftAllRightInt16x16 x y) => (VPSRAW256 x y)
+(ShiftAllRightInt16x32 x (MOVQconst [c])) => (VPSRAW512const [int8(c)] x)
+(ShiftAllRightInt16x32 x y) => (VPSRAW512 x y)
+(ShiftAllRightInt32x4 x (MOVQconst [c])) => (VPSRAD128const [int8(c)] x)
+(ShiftAllRightInt32x4 x y) => (VPSRAD128 x y)
+(ShiftAllRightInt32x8 x (MOVQconst [c])) => (VPSRAD256const [int8(c)] x)
+(ShiftAllRightInt32x8 x y) => (VPSRAD256 x y)
+(ShiftAllRightInt32x16 x (MOVQconst [c])) => (VPSRAD512const [int8(c)] x)
+(ShiftAllRightInt32x16 x y) => (VPSRAD512 x y)
+(ShiftAllRightInt64x2 x (MOVQconst [c])) => (VPSRAQ128const [int8(c)] x)
+(ShiftAllRightInt64x2 x y) => (VPSRAQ128 x y)
+(ShiftAllRightInt64x4 x (MOVQconst [c])) => (VPSRAQ256const [int8(c)] x)
+(ShiftAllRightInt64x4 x y) => (VPSRAQ256 x y)
+(ShiftAllRightInt64x8 x (MOVQconst [c])) => (VPSRAQ512const [int8(c)] x)
+(ShiftAllRightInt64x8 x y) => (VPSRAQ512 x y)
+(ShiftAllRightUint16x8 x (MOVQconst [c])) => (VPSRLW128const [int8(c)] x)
+(ShiftAllRightUint16x8 x y) => (VPSRLW128 x y)
+(ShiftAllRightUint16x16 x (MOVQconst [c])) => (VPSRLW256const [int8(c)] x)
+(ShiftAllRightUint16x16 x y) => (VPSRLW256 x y)
+(ShiftAllRightUint16x32 x (MOVQconst [c])) => (VPSRLW512const [int8(c)] x)
+(ShiftAllRightUint16x32 x y) => (VPSRLW512 x y)
+(ShiftAllRightUint32x4 x (MOVQconst [c])) => (VPSRLD128const [int8(c)] x)
+(ShiftAllRightUint32x4 x y) => (VPSRLD128 x y)
+(ShiftAllRightUint32x8 x (MOVQconst [c])) => (VPSRLD256const [int8(c)] x)
+(ShiftAllRightUint32x8 x y) => (VPSRLD256 x y)
+(ShiftAllRightUint32x16 x (MOVQconst [c])) => (VPSRLD512const [int8(c)] x)
+(ShiftAllRightUint32x16 x y) => (VPSRLD512 x y)
+(ShiftAllRightUint64x2 x (MOVQconst [c])) => (VPSRLQ128const [int8(c)] x)
+(ShiftAllRightUint64x2 x y) => (VPSRLQ128 x y)
+(ShiftAllRightUint64x4 x (MOVQconst [c])) => (VPSRLQ256const [int8(c)] x)
+(ShiftAllRightUint64x4 x y) => (VPSRLQ256 x y)
+(ShiftAllRightUint64x8 x (MOVQconst [c])) => (VPSRLQ512const [int8(c)] x)
+(ShiftAllRightUint64x8 x y) => (VPSRLQ512 x y)
 (ShiftAllRightConcatInt16x8 ...) => (VPSHRDW128 ...)
 (ShiftAllRightConcatInt16x16 ...) => (VPSHRDW256 ...)
 (ShiftAllRightConcatInt16x32 ...) => (VPSHRDW512 ...)
 (ShiftAllRightConcatMaskedUint64x2 [a] x y mask) => (VPSHRDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
 (ShiftAllRightConcatMaskedUint64x4 [a] x y mask) => (VPSHRDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
 (ShiftAllRightConcatMaskedUint64x8 [a] x y mask) => (VPSHRDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+(ShiftAllRightMaskedInt16x8 x (MOVQconst [c]) mask) => (VPSRAWMasked128const [int8(c)] x (VPMOVVec16x8ToM <types.TypeMask> mask))
 (ShiftAllRightMaskedInt16x8 x y mask) => (VPSRAWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(ShiftAllRightMaskedInt16x16 x (MOVQconst [c]) mask) => (VPSRAWMasked256const [int8(c)] x (VPMOVVec16x16ToM <types.TypeMask> mask))
 (ShiftAllRightMaskedInt16x16 x y mask) => (VPSRAWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(ShiftAllRightMaskedInt16x32 x (MOVQconst [c]) mask) => (VPSRAWMasked512const [int8(c)] x (VPMOVVec16x32ToM <types.TypeMask> mask))
 (ShiftAllRightMaskedInt16x32 x y mask) => (VPSRAWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+(ShiftAllRightMaskedInt32x4 x (MOVQconst [c]) mask) => (VPSRADMasked128const [int8(c)] x (VPMOVVec32x4ToM <types.TypeMask> mask))
 (ShiftAllRightMaskedInt32x4 x y mask) => (VPSRADMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+(ShiftAllRightMaskedInt32x8 x (MOVQconst [c]) mask) => (VPSRADMasked256const [int8(c)] x (VPMOVVec32x8ToM <types.TypeMask> mask))
 (ShiftAllRightMaskedInt32x8 x y mask) => (VPSRADMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(ShiftAllRightMaskedInt32x16 x (MOVQconst [c]) mask) => (VPSRADMasked512const [int8(c)] x (VPMOVVec32x16ToM <types.TypeMask> mask))
 (ShiftAllRightMaskedInt32x16 x y mask) => (VPSRADMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+(ShiftAllRightMaskedInt64x2 x (MOVQconst [c]) mask) => (VPSRAQMasked128const [int8(c)] x (VPMOVVec64x2ToM <types.TypeMask> mask))
 (ShiftAllRightMaskedInt64x2 x y mask) => (VPSRAQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+(ShiftAllRightMaskedInt64x4 x (MOVQconst [c]) mask) => (VPSRAQMasked256const [int8(c)] x (VPMOVVec64x4ToM <types.TypeMask> mask))
 (ShiftAllRightMaskedInt64x4 x y mask) => (VPSRAQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+(ShiftAllRightMaskedInt64x8 x (MOVQconst [c]) mask) => (VPSRAQMasked512const [int8(c)] x (VPMOVVec64x8ToM <types.TypeMask> mask))
 (ShiftAllRightMaskedInt64x8 x y mask) => (VPSRAQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+(ShiftAllRightMaskedUint16x8 x (MOVQconst [c]) mask) => (VPSRLWMasked128const [int8(c)] x (VPMOVVec16x8ToM <types.TypeMask> mask))
 (ShiftAllRightMaskedUint16x8 x y mask) => (VPSRLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(ShiftAllRightMaskedUint16x16 x (MOVQconst [c]) mask) => (VPSRLWMasked256const [int8(c)] x (VPMOVVec16x16ToM <types.TypeMask> mask))
 (ShiftAllRightMaskedUint16x16 x y mask) => (VPSRLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(ShiftAllRightMaskedUint16x32 x (MOVQconst [c]) mask) => (VPSRLWMasked512const [int8(c)] x (VPMOVVec16x32ToM <types.TypeMask> mask))
 (ShiftAllRightMaskedUint16x32 x y mask) => (VPSRLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+(ShiftAllRightMaskedUint32x4 x (MOVQconst [c]) mask) => (VPSRLDMasked128const [int8(c)] x (VPMOVVec32x4ToM <types.TypeMask> mask))
 (ShiftAllRightMaskedUint32x4 x y mask) => (VPSRLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+(ShiftAllRightMaskedUint32x8 x (MOVQconst [c]) mask) => (VPSRLDMasked256const [int8(c)] x (VPMOVVec32x8ToM <types.TypeMask> mask))
 (ShiftAllRightMaskedUint32x8 x y mask) => (VPSRLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(ShiftAllRightMaskedUint32x16 x (MOVQconst [c]) mask) => (VPSRLDMasked512const [int8(c)] x (VPMOVVec32x16ToM <types.TypeMask> mask))
 (ShiftAllRightMaskedUint32x16 x y mask) => (VPSRLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+(ShiftAllRightMaskedUint64x2 x (MOVQconst [c]) mask) => (VPSRLQMasked128const [int8(c)] x (VPMOVVec64x2ToM <types.TypeMask> mask))
 (ShiftAllRightMaskedUint64x2 x y mask) => (VPSRLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+(ShiftAllRightMaskedUint64x4 x (MOVQconst [c]) mask) => (VPSRLQMasked256const [int8(c)] x (VPMOVVec64x4ToM <types.TypeMask> mask))
 (ShiftAllRightMaskedUint64x4 x y mask) => (VPSRLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+(ShiftAllRightMaskedUint64x8 x (MOVQconst [c]) mask) => (VPSRLQMasked512const [int8(c)] x (VPMOVVec64x8ToM <types.TypeMask> mask))
 (ShiftAllRightMaskedUint64x8 x y mask) => (VPSRLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (ShiftLeftInt16x8 ...) => (VPSLLVW128 ...)
 (ShiftLeftInt16x16 ...) => (VPSLLVW256 ...)
index adb6dd968f581dd85fa5d14bdb8a8d672a94d1fd..8b7a7791bc3cfc3592f9bfa984c30367b101eb2f 100644 (file)
@@ -1002,5 +1002,59 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
                {name: "VPSHRDQMasked128", argLength: 3, reg: w2kw, asm: "VPSHRDQ", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VPSHRDQMasked256", argLength: 3, reg: w2kw, asm: "VPSHRDQ", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
                {name: "VPSHRDQMasked512", argLength: 3, reg: w2kw, asm: "VPSHRDQ", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
+               {name: "VPSLLW128const", argLength: 1, reg: v11, asm: "VPSLLW", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPSLLW256const", argLength: 1, reg: v11, asm: "VPSLLW", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
+               {name: "VPSLLW512const", argLength: 1, reg: w11, asm: "VPSLLW", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
+               {name: "VPSLLD128const", argLength: 1, reg: v11, asm: "VPSLLD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPSLLD256const", argLength: 1, reg: v11, asm: "VPSLLD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
+               {name: "VPSLLD512const", argLength: 1, reg: w11, asm: "VPSLLD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
+               {name: "VPSLLQ128const", argLength: 1, reg: v11, asm: "VPSLLQ", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPSLLQ256const", argLength: 1, reg: v11, asm: "VPSLLQ", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
+               {name: "VPSLLQ512const", argLength: 1, reg: w11, asm: "VPSLLQ", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
+               {name: "VPSLLWMasked128const", argLength: 2, reg: wkw, asm: "VPSLLW", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPSLLWMasked256const", argLength: 2, reg: wkw, asm: "VPSLLW", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
+               {name: "VPSLLWMasked512const", argLength: 2, reg: wkw, asm: "VPSLLW", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
+               {name: "VPSLLDMasked128const", argLength: 2, reg: wkw, asm: "VPSLLD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPSLLDMasked256const", argLength: 2, reg: wkw, asm: "VPSLLD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
+               {name: "VPSLLDMasked512const", argLength: 2, reg: wkw, asm: "VPSLLD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
+               {name: "VPSLLQMasked128const", argLength: 2, reg: wkw, asm: "VPSLLQ", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPSLLQMasked256const", argLength: 2, reg: wkw, asm: "VPSLLQ", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
+               {name: "VPSLLQMasked512const", argLength: 2, reg: wkw, asm: "VPSLLQ", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
+               {name: "VPSRLW128const", argLength: 1, reg: v11, asm: "VPSRLW", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPSRLW256const", argLength: 1, reg: v11, asm: "VPSRLW", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
+               {name: "VPSRLW512const", argLength: 1, reg: w11, asm: "VPSRLW", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
+               {name: "VPSRLD128const", argLength: 1, reg: v11, asm: "VPSRLD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPSRLD256const", argLength: 1, reg: v11, asm: "VPSRLD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
+               {name: "VPSRLD512const", argLength: 1, reg: w11, asm: "VPSRLD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
+               {name: "VPSRLQ128const", argLength: 1, reg: v11, asm: "VPSRLQ", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPSRLQ256const", argLength: 1, reg: v11, asm: "VPSRLQ", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
+               {name: "VPSRLQ512const", argLength: 1, reg: w11, asm: "VPSRLQ", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
+               {name: "VPSRAW128const", argLength: 1, reg: v11, asm: "VPSRAW", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPSRAW256const", argLength: 1, reg: v11, asm: "VPSRAW", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
+               {name: "VPSRAW512const", argLength: 1, reg: w11, asm: "VPSRAW", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
+               {name: "VPSRAD128const", argLength: 1, reg: v11, asm: "VPSRAD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPSRAD256const", argLength: 1, reg: v11, asm: "VPSRAD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
+               {name: "VPSRAD512const", argLength: 1, reg: w11, asm: "VPSRAD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
+               {name: "VPSRAQ128const", argLength: 1, reg: w11, asm: "VPSRAQ", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPSRAQ256const", argLength: 1, reg: w11, asm: "VPSRAQ", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
+               {name: "VPSRAQ512const", argLength: 1, reg: w11, asm: "VPSRAQ", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
+               {name: "VPSRLWMasked128const", argLength: 2, reg: wkw, asm: "VPSRLW", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPSRLWMasked256const", argLength: 2, reg: wkw, asm: "VPSRLW", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
+               {name: "VPSRLWMasked512const", argLength: 2, reg: wkw, asm: "VPSRLW", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
+               {name: "VPSRLDMasked128const", argLength: 2, reg: wkw, asm: "VPSRLD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPSRLDMasked256const", argLength: 2, reg: wkw, asm: "VPSRLD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
+               {name: "VPSRLDMasked512const", argLength: 2, reg: wkw, asm: "VPSRLD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
+               {name: "VPSRLQMasked128const", argLength: 2, reg: wkw, asm: "VPSRLQ", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPSRLQMasked256const", argLength: 2, reg: wkw, asm: "VPSRLQ", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
+               {name: "VPSRLQMasked512const", argLength: 2, reg: wkw, asm: "VPSRLQ", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
+               {name: "VPSRAWMasked128const", argLength: 2, reg: wkw, asm: "VPSRAW", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPSRAWMasked256const", argLength: 2, reg: wkw, asm: "VPSRAW", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
+               {name: "VPSRAWMasked512const", argLength: 2, reg: wkw, asm: "VPSRAW", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
+               {name: "VPSRADMasked128const", argLength: 2, reg: wkw, asm: "VPSRAD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPSRADMasked256const", argLength: 2, reg: wkw, asm: "VPSRAD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
+               {name: "VPSRADMasked512const", argLength: 2, reg: wkw, asm: "VPSRAD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
+               {name: "VPSRAQMasked128const", argLength: 2, reg: wkw, asm: "VPSRAQ", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPSRAQMasked256const", argLength: 2, reg: wkw, asm: "VPSRAQ", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
+               {name: "VPSRAQMasked512const", argLength: 2, reg: wkw, asm: "VPSRAQ", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
        }
 }
index a69612f28a1aa37413595693da91837d60902227..15fcabbb8d347673ffc4aba10a132ff7c9ed3e5c 100644 (file)
@@ -2221,6 +2221,60 @@ const (
        OpAMD64VPSHRDQMasked128
        OpAMD64VPSHRDQMasked256
        OpAMD64VPSHRDQMasked512
+       OpAMD64VPSLLW128const
+       OpAMD64VPSLLW256const
+       OpAMD64VPSLLW512const
+       OpAMD64VPSLLD128const
+       OpAMD64VPSLLD256const
+       OpAMD64VPSLLD512const
+       OpAMD64VPSLLQ128const
+       OpAMD64VPSLLQ256const
+       OpAMD64VPSLLQ512const
+       OpAMD64VPSLLWMasked128const
+       OpAMD64VPSLLWMasked256const
+       OpAMD64VPSLLWMasked512const
+       OpAMD64VPSLLDMasked128const
+       OpAMD64VPSLLDMasked256const
+       OpAMD64VPSLLDMasked512const
+       OpAMD64VPSLLQMasked128const
+       OpAMD64VPSLLQMasked256const
+       OpAMD64VPSLLQMasked512const
+       OpAMD64VPSRLW128const
+       OpAMD64VPSRLW256const
+       OpAMD64VPSRLW512const
+       OpAMD64VPSRLD128const
+       OpAMD64VPSRLD256const
+       OpAMD64VPSRLD512const
+       OpAMD64VPSRLQ128const
+       OpAMD64VPSRLQ256const
+       OpAMD64VPSRLQ512const
+       OpAMD64VPSRAW128const
+       OpAMD64VPSRAW256const
+       OpAMD64VPSRAW512const
+       OpAMD64VPSRAD128const
+       OpAMD64VPSRAD256const
+       OpAMD64VPSRAD512const
+       OpAMD64VPSRAQ128const
+       OpAMD64VPSRAQ256const
+       OpAMD64VPSRAQ512const
+       OpAMD64VPSRLWMasked128const
+       OpAMD64VPSRLWMasked256const
+       OpAMD64VPSRLWMasked512const
+       OpAMD64VPSRLDMasked128const
+       OpAMD64VPSRLDMasked256const
+       OpAMD64VPSRLDMasked512const
+       OpAMD64VPSRLQMasked128const
+       OpAMD64VPSRLQMasked256const
+       OpAMD64VPSRLQMasked512const
+       OpAMD64VPSRAWMasked128const
+       OpAMD64VPSRAWMasked256const
+       OpAMD64VPSRAWMasked512const
+       OpAMD64VPSRADMasked128const
+       OpAMD64VPSRADMasked256const
+       OpAMD64VPSRADMasked512const
+       OpAMD64VPSRAQMasked128const
+       OpAMD64VPSRAQMasked256const
+       OpAMD64VPSRAQMasked512const
 
        OpARMADD
        OpARMADDconst
@@ -34317,6 +34371,789 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:    "VPSLLW128const",
+               auxType: auxInt8,
+               argLen:  1,
+               asm:     x86.AVPSLLW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VPSLLW256const",
+               auxType: auxInt8,
+               argLen:  1,
+               asm:     x86.AVPSLLW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VPSLLW512const",
+               auxType: auxInt8,
+               argLen:  1,
+               asm:     x86.AVPSLLW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:    "VPSLLD128const",
+               auxType: auxInt8,
+               argLen:  1,
+               asm:     x86.AVPSLLD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VPSLLD256const",
+               auxType: auxInt8,
+               argLen:  1,
+               asm:     x86.AVPSLLD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VPSLLD512const",
+               auxType: auxInt8,
+               argLen:  1,
+               asm:     x86.AVPSLLD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:    "VPSLLQ128const",
+               auxType: auxInt8,
+               argLen:  1,
+               asm:     x86.AVPSLLQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VPSLLQ256const",
+               auxType: auxInt8,
+               argLen:  1,
+               asm:     x86.AVPSLLQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VPSLLQ512const",
+               auxType: auxInt8,
+               argLen:  1,
+               asm:     x86.AVPSLLQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:    "VPSLLWMasked128const",
+               auxType: auxInt8,
+               argLen:  2,
+               asm:     x86.AVPSLLW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VPSLLWMasked256const",
+               auxType: auxInt8,
+               argLen:  2,
+               asm:     x86.AVPSLLW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VPSLLWMasked512const",
+               auxType: auxInt8,
+               argLen:  2,
+               asm:     x86.AVPSLLW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VPSLLDMasked128const",
+               auxType: auxInt8,
+               argLen:  2,
+               asm:     x86.AVPSLLD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VPSLLDMasked256const",
+               auxType: auxInt8,
+               argLen:  2,
+               asm:     x86.AVPSLLD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VPSLLDMasked512const",
+               auxType: auxInt8,
+               argLen:  2,
+               asm:     x86.AVPSLLD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VPSLLQMasked128const",
+               auxType: auxInt8,
+               argLen:  2,
+               asm:     x86.AVPSLLQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VPSLLQMasked256const",
+               auxType: auxInt8,
+               argLen:  2,
+               asm:     x86.AVPSLLQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VPSLLQMasked512const",
+               auxType: auxInt8,
+               argLen:  2,
+               asm:     x86.AVPSLLQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VPSRLW128const",
+               auxType: auxInt8,
+               argLen:  1,
+               asm:     x86.AVPSRLW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VPSRLW256const",
+               auxType: auxInt8,
+               argLen:  1,
+               asm:     x86.AVPSRLW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VPSRLW512const",
+               auxType: auxInt8,
+               argLen:  1,
+               asm:     x86.AVPSRLW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:    "VPSRLD128const",
+               auxType: auxInt8,
+               argLen:  1,
+               asm:     x86.AVPSRLD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VPSRLD256const",
+               auxType: auxInt8,
+               argLen:  1,
+               asm:     x86.AVPSRLD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VPSRLD512const",
+               auxType: auxInt8,
+               argLen:  1,
+               asm:     x86.AVPSRLD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:    "VPSRLQ128const",
+               auxType: auxInt8,
+               argLen:  1,
+               asm:     x86.AVPSRLQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VPSRLQ256const",
+               auxType: auxInt8,
+               argLen:  1,
+               asm:     x86.AVPSRLQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VPSRLQ512const",
+               auxType: auxInt8,
+               argLen:  1,
+               asm:     x86.AVPSRLQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:    "VPSRAW128const",
+               auxType: auxInt8,
+               argLen:  1,
+               asm:     x86.AVPSRAW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VPSRAW256const",
+               auxType: auxInt8,
+               argLen:  1,
+               asm:     x86.AVPSRAW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VPSRAW512const",
+               auxType: auxInt8,
+               argLen:  1,
+               asm:     x86.AVPSRAW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:    "VPSRAD128const",
+               auxType: auxInt8,
+               argLen:  1,
+               asm:     x86.AVPSRAD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VPSRAD256const",
+               auxType: auxInt8,
+               argLen:  1,
+               asm:     x86.AVPSRAD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VPSRAD512const",
+               auxType: auxInt8,
+               argLen:  1,
+               asm:     x86.AVPSRAD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:    "VPSRAQ128const",
+               auxType: auxInt8,
+               argLen:  1,
+               asm:     x86.AVPSRAQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:    "VPSRAQ256const",
+               auxType: auxInt8,
+               argLen:  1,
+               asm:     x86.AVPSRAQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:    "VPSRAQ512const",
+               auxType: auxInt8,
+               argLen:  1,
+               asm:     x86.AVPSRAQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:    "VPSRLWMasked128const",
+               auxType: auxInt8,
+               argLen:  2,
+               asm:     x86.AVPSRLW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VPSRLWMasked256const",
+               auxType: auxInt8,
+               argLen:  2,
+               asm:     x86.AVPSRLW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VPSRLWMasked512const",
+               auxType: auxInt8,
+               argLen:  2,
+               asm:     x86.AVPSRLW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VPSRLDMasked128const",
+               auxType: auxInt8,
+               argLen:  2,
+               asm:     x86.AVPSRLD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VPSRLDMasked256const",
+               auxType: auxInt8,
+               argLen:  2,
+               asm:     x86.AVPSRLD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VPSRLDMasked512const",
+               auxType: auxInt8,
+               argLen:  2,
+               asm:     x86.AVPSRLD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VPSRLQMasked128const",
+               auxType: auxInt8,
+               argLen:  2,
+               asm:     x86.AVPSRLQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VPSRLQMasked256const",
+               auxType: auxInt8,
+               argLen:  2,
+               asm:     x86.AVPSRLQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VPSRLQMasked512const",
+               auxType: auxInt8,
+               argLen:  2,
+               asm:     x86.AVPSRLQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VPSRAWMasked128const",
+               auxType: auxInt8,
+               argLen:  2,
+               asm:     x86.AVPSRAW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VPSRAWMasked256const",
+               auxType: auxInt8,
+               argLen:  2,
+               asm:     x86.AVPSRAW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VPSRAWMasked512const",
+               auxType: auxInt8,
+               argLen:  2,
+               asm:     x86.AVPSRAW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VPSRADMasked128const",
+               auxType: auxInt8,
+               argLen:  2,
+               asm:     x86.AVPSRAD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VPSRADMasked256const",
+               auxType: auxInt8,
+               argLen:  2,
+               asm:     x86.AVPSRAD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VPSRADMasked512const",
+               auxType: auxInt8,
+               argLen:  2,
+               asm:     x86.AVPSRAD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VPSRAQMasked128const",
+               auxType: auxInt8,
+               argLen:  2,
+               asm:     x86.AVPSRAQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VPSRAQMasked256const",
+               auxType: auxInt8,
+               argLen:  2,
+               asm:     x86.AVPSRAQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VPSRAQMasked512const",
+               auxType: auxInt8,
+               argLen:  2,
+               asm:     x86.AVPSRAQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
 
        {
                name:        "ADD",
index f0b25d3c5d125f4689188f56a9c9ead36fe98d56..2e564b0c30771870df204a29a48fd7ef21a9ac76 100644 (file)
@@ -4451,32 +4451,23 @@ func rewriteValueAMD64(v *Value) bool {
                v.Op = OpAMD64VPSHLDQ512
                return true
        case OpShiftAllLeftInt16x16:
-               v.Op = OpAMD64VPSLLW256
-               return true
+               return rewriteValueAMD64_OpShiftAllLeftInt16x16(v)
        case OpShiftAllLeftInt16x32:
-               v.Op = OpAMD64VPSLLW512
-               return true
+               return rewriteValueAMD64_OpShiftAllLeftInt16x32(v)
        case OpShiftAllLeftInt16x8:
-               v.Op = OpAMD64VPSLLW128
-               return true
+               return rewriteValueAMD64_OpShiftAllLeftInt16x8(v)
        case OpShiftAllLeftInt32x16:
-               v.Op = OpAMD64VPSLLD512
-               return true
+               return rewriteValueAMD64_OpShiftAllLeftInt32x16(v)
        case OpShiftAllLeftInt32x4:
-               v.Op = OpAMD64VPSLLD128
-               return true
+               return rewriteValueAMD64_OpShiftAllLeftInt32x4(v)
        case OpShiftAllLeftInt32x8:
-               v.Op = OpAMD64VPSLLD256
-               return true
+               return rewriteValueAMD64_OpShiftAllLeftInt32x8(v)
        case OpShiftAllLeftInt64x2:
-               v.Op = OpAMD64VPSLLQ128
-               return true
+               return rewriteValueAMD64_OpShiftAllLeftInt64x2(v)
        case OpShiftAllLeftInt64x4:
-               v.Op = OpAMD64VPSLLQ256
-               return true
+               return rewriteValueAMD64_OpShiftAllLeftInt64x4(v)
        case OpShiftAllLeftInt64x8:
-               v.Op = OpAMD64VPSLLQ512
-               return true
+               return rewriteValueAMD64_OpShiftAllLeftInt64x8(v)
        case OpShiftAllLeftMaskedInt16x16:
                return rewriteValueAMD64_OpShiftAllLeftMaskedInt16x16(v)
        case OpShiftAllLeftMaskedInt16x32:
@@ -4514,32 +4505,23 @@ func rewriteValueAMD64(v *Value) bool {
        case OpShiftAllLeftMaskedUint64x8:
                return rewriteValueAMD64_OpShiftAllLeftMaskedUint64x8(v)
        case OpShiftAllLeftUint16x16:
-               v.Op = OpAMD64VPSLLW256
-               return true
+               return rewriteValueAMD64_OpShiftAllLeftUint16x16(v)
        case OpShiftAllLeftUint16x32:
-               v.Op = OpAMD64VPSLLW512
-               return true
+               return rewriteValueAMD64_OpShiftAllLeftUint16x32(v)
        case OpShiftAllLeftUint16x8:
-               v.Op = OpAMD64VPSLLW128
-               return true
+               return rewriteValueAMD64_OpShiftAllLeftUint16x8(v)
        case OpShiftAllLeftUint32x16:
-               v.Op = OpAMD64VPSLLD512
-               return true
+               return rewriteValueAMD64_OpShiftAllLeftUint32x16(v)
        case OpShiftAllLeftUint32x4:
-               v.Op = OpAMD64VPSLLD128
-               return true
+               return rewriteValueAMD64_OpShiftAllLeftUint32x4(v)
        case OpShiftAllLeftUint32x8:
-               v.Op = OpAMD64VPSLLD256
-               return true
+               return rewriteValueAMD64_OpShiftAllLeftUint32x8(v)
        case OpShiftAllLeftUint64x2:
-               v.Op = OpAMD64VPSLLQ128
-               return true
+               return rewriteValueAMD64_OpShiftAllLeftUint64x2(v)
        case OpShiftAllLeftUint64x4:
-               v.Op = OpAMD64VPSLLQ256
-               return true
+               return rewriteValueAMD64_OpShiftAllLeftUint64x4(v)
        case OpShiftAllLeftUint64x8:
-               v.Op = OpAMD64VPSLLQ512
-               return true
+               return rewriteValueAMD64_OpShiftAllLeftUint64x8(v)
        case OpShiftAllRightConcatInt16x16:
                v.Op = OpAMD64VPSHRDW256
                return true
@@ -4631,32 +4613,23 @@ func rewriteValueAMD64(v *Value) bool {
                v.Op = OpAMD64VPSHRDQ512
                return true
        case OpShiftAllRightInt16x16:
-               v.Op = OpAMD64VPSRAW256
-               return true
+               return rewriteValueAMD64_OpShiftAllRightInt16x16(v)
        case OpShiftAllRightInt16x32:
-               v.Op = OpAMD64VPSRAW512
-               return true
+               return rewriteValueAMD64_OpShiftAllRightInt16x32(v)
        case OpShiftAllRightInt16x8:
-               v.Op = OpAMD64VPSRAW128
-               return true
+               return rewriteValueAMD64_OpShiftAllRightInt16x8(v)
        case OpShiftAllRightInt32x16:
-               v.Op = OpAMD64VPSRAD512
-               return true
+               return rewriteValueAMD64_OpShiftAllRightInt32x16(v)
        case OpShiftAllRightInt32x4:
-               v.Op = OpAMD64VPSRAD128
-               return true
+               return rewriteValueAMD64_OpShiftAllRightInt32x4(v)
        case OpShiftAllRightInt32x8:
-               v.Op = OpAMD64VPSRAD256
-               return true
+               return rewriteValueAMD64_OpShiftAllRightInt32x8(v)
        case OpShiftAllRightInt64x2:
-               v.Op = OpAMD64VPSRAQ128
-               return true
+               return rewriteValueAMD64_OpShiftAllRightInt64x2(v)
        case OpShiftAllRightInt64x4:
-               v.Op = OpAMD64VPSRAQ256
-               return true
+               return rewriteValueAMD64_OpShiftAllRightInt64x4(v)
        case OpShiftAllRightInt64x8:
-               v.Op = OpAMD64VPSRAQ512
-               return true
+               return rewriteValueAMD64_OpShiftAllRightInt64x8(v)
        case OpShiftAllRightMaskedInt16x16:
                return rewriteValueAMD64_OpShiftAllRightMaskedInt16x16(v)
        case OpShiftAllRightMaskedInt16x32:
@@ -4694,32 +4667,23 @@ func rewriteValueAMD64(v *Value) bool {
        case OpShiftAllRightMaskedUint64x8:
                return rewriteValueAMD64_OpShiftAllRightMaskedUint64x8(v)
        case OpShiftAllRightUint16x16:
-               v.Op = OpAMD64VPSRLW256
-               return true
+               return rewriteValueAMD64_OpShiftAllRightUint16x16(v)
        case OpShiftAllRightUint16x32:
-               v.Op = OpAMD64VPSRLW512
-               return true
+               return rewriteValueAMD64_OpShiftAllRightUint16x32(v)
        case OpShiftAllRightUint16x8:
-               v.Op = OpAMD64VPSRLW128
-               return true
+               return rewriteValueAMD64_OpShiftAllRightUint16x8(v)
        case OpShiftAllRightUint32x16:
-               v.Op = OpAMD64VPSRLD512
-               return true
+               return rewriteValueAMD64_OpShiftAllRightUint32x16(v)
        case OpShiftAllRightUint32x4:
-               v.Op = OpAMD64VPSRLD128
-               return true
+               return rewriteValueAMD64_OpShiftAllRightUint32x4(v)
        case OpShiftAllRightUint32x8:
-               v.Op = OpAMD64VPSRLD256
-               return true
+               return rewriteValueAMD64_OpShiftAllRightUint32x8(v)
        case OpShiftAllRightUint64x2:
-               v.Op = OpAMD64VPSRLQ128
-               return true
+               return rewriteValueAMD64_OpShiftAllRightUint64x2(v)
        case OpShiftAllRightUint64x4:
-               v.Op = OpAMD64VPSRLQ256
-               return true
+               return rewriteValueAMD64_OpShiftAllRightUint64x4(v)
        case OpShiftAllRightUint64x8:
-               v.Op = OpAMD64VPSRLQ512
-               return true
+               return rewriteValueAMD64_OpShiftAllRightUint64x8(v)
        case OpShiftLeftConcatInt16x16:
                v.Op = OpAMD64VPSHLDVW256
                return true
@@ -50791,11 +50755,261 @@ func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint64x8(v *Value) bool {
                return true
        }
 }
+func rewriteValueAMD64_OpShiftAllLeftInt16x16(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (ShiftAllLeftInt16x16 x (MOVQconst [c]))
+       // result: (VPSLLW256const [int8(c)] x)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64VPSLLW256const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v.AddArg(x)
+               return true
+       }
+       // match: (ShiftAllLeftInt16x16 x y)
+       // result: (VPSLLW256 x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VPSLLW256)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpShiftAllLeftInt16x32(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (ShiftAllLeftInt16x32 x (MOVQconst [c]))
+       // result: (VPSLLW512const [int8(c)] x)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64VPSLLW512const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v.AddArg(x)
+               return true
+       }
+       // match: (ShiftAllLeftInt16x32 x y)
+       // result: (VPSLLW512 x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VPSLLW512)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpShiftAllLeftInt16x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (ShiftAllLeftInt16x8 x (MOVQconst [c]))
+       // result: (VPSLLW128const [int8(c)] x)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64VPSLLW128const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v.AddArg(x)
+               return true
+       }
+       // match: (ShiftAllLeftInt16x8 x y)
+       // result: (VPSLLW128 x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VPSLLW128)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpShiftAllLeftInt32x16(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (ShiftAllLeftInt32x16 x (MOVQconst [c]))
+       // result: (VPSLLD512const [int8(c)] x)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64VPSLLD512const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v.AddArg(x)
+               return true
+       }
+       // match: (ShiftAllLeftInt32x16 x y)
+       // result: (VPSLLD512 x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VPSLLD512)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpShiftAllLeftInt32x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (ShiftAllLeftInt32x4 x (MOVQconst [c]))
+       // result: (VPSLLD128const [int8(c)] x)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64VPSLLD128const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v.AddArg(x)
+               return true
+       }
+       // match: (ShiftAllLeftInt32x4 x y)
+       // result: (VPSLLD128 x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VPSLLD128)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpShiftAllLeftInt32x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (ShiftAllLeftInt32x8 x (MOVQconst [c]))
+       // result: (VPSLLD256const [int8(c)] x)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64VPSLLD256const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v.AddArg(x)
+               return true
+       }
+       // match: (ShiftAllLeftInt32x8 x y)
+       // result: (VPSLLD256 x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VPSLLD256)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpShiftAllLeftInt64x2(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (ShiftAllLeftInt64x2 x (MOVQconst [c]))
+       // result: (VPSLLQ128const [int8(c)] x)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64VPSLLQ128const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v.AddArg(x)
+               return true
+       }
+       // match: (ShiftAllLeftInt64x2 x y)
+       // result: (VPSLLQ128 x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VPSLLQ128)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpShiftAllLeftInt64x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (ShiftAllLeftInt64x4 x (MOVQconst [c]))
+       // result: (VPSLLQ256const [int8(c)] x)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64VPSLLQ256const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v.AddArg(x)
+               return true
+       }
+       // match: (ShiftAllLeftInt64x4 x y)
+       // result: (VPSLLQ256 x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VPSLLQ256)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpShiftAllLeftInt64x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (ShiftAllLeftInt64x8 x (MOVQconst [c]))
+       // result: (VPSLLQ512const [int8(c)] x)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64VPSLLQ512const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v.AddArg(x)
+               return true
+       }
+       // match: (ShiftAllLeftInt64x8 x y)
+       // result: (VPSLLQ512 x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VPSLLQ512)
+               v.AddArg2(x, y)
+               return true
+       }
+}
 func rewriteValueAMD64_OpShiftAllLeftMaskedInt16x16(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
+       // match: (ShiftAllLeftMaskedInt16x16 x (MOVQconst [c]) mask)
+       // result: (VPSLLWMasked256const [int8(c)] x (VPMOVVec16x16ToM <types.TypeMask> mask))
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               mask := v_2
+               v.reset(OpAMD64VPSLLWMasked256const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
        // match: (ShiftAllLeftMaskedInt16x16 x y mask)
        // result: (VPSLLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
        for {
@@ -50814,6 +51028,22 @@ func rewriteValueAMD64_OpShiftAllLeftMaskedInt16x32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
+       // match: (ShiftAllLeftMaskedInt16x32 x (MOVQconst [c]) mask)
+       // result: (VPSLLWMasked512const [int8(c)] x (VPMOVVec16x32ToM <types.TypeMask> mask))
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               mask := v_2
+               v.reset(OpAMD64VPSLLWMasked512const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
        // match: (ShiftAllLeftMaskedInt16x32 x y mask)
        // result: (VPSLLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
        for {
@@ -50832,6 +51062,22 @@ func rewriteValueAMD64_OpShiftAllLeftMaskedInt16x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
+       // match: (ShiftAllLeftMaskedInt16x8 x (MOVQconst [c]) mask)
+       // result: (VPSLLWMasked128const [int8(c)] x (VPMOVVec16x8ToM <types.TypeMask> mask))
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               mask := v_2
+               v.reset(OpAMD64VPSLLWMasked128const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
        // match: (ShiftAllLeftMaskedInt16x8 x y mask)
        // result: (VPSLLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
        for {
@@ -50850,6 +51096,22 @@ func rewriteValueAMD64_OpShiftAllLeftMaskedInt32x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
+       // match: (ShiftAllLeftMaskedInt32x16 x (MOVQconst [c]) mask)
+       // result: (VPSLLDMasked512const [int8(c)] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               mask := v_2
+               v.reset(OpAMD64VPSLLDMasked512const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
        // match: (ShiftAllLeftMaskedInt32x16 x y mask)
        // result: (VPSLLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
        for {
@@ -50868,6 +51130,22 @@ func rewriteValueAMD64_OpShiftAllLeftMaskedInt32x4(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
+       // match: (ShiftAllLeftMaskedInt32x4 x (MOVQconst [c]) mask)
+       // result: (VPSLLDMasked128const [int8(c)] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               mask := v_2
+               v.reset(OpAMD64VPSLLDMasked128const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
        // match: (ShiftAllLeftMaskedInt32x4 x y mask)
        // result: (VPSLLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
        for {
@@ -50886,6 +51164,22 @@ func rewriteValueAMD64_OpShiftAllLeftMaskedInt32x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
+       // match: (ShiftAllLeftMaskedInt32x8 x (MOVQconst [c]) mask)
+       // result: (VPSLLDMasked256const [int8(c)] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               mask := v_2
+               v.reset(OpAMD64VPSLLDMasked256const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
        // match: (ShiftAllLeftMaskedInt32x8 x y mask)
        // result: (VPSLLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
@@ -50904,6 +51198,22 @@ func rewriteValueAMD64_OpShiftAllLeftMaskedInt64x2(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
+       // match: (ShiftAllLeftMaskedInt64x2 x (MOVQconst [c]) mask)
+       // result: (VPSLLQMasked128const [int8(c)] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               mask := v_2
+               v.reset(OpAMD64VPSLLQMasked128const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
        // match: (ShiftAllLeftMaskedInt64x2 x y mask)
        // result: (VPSLLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
@@ -50922,6 +51232,22 @@ func rewriteValueAMD64_OpShiftAllLeftMaskedInt64x4(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
+       // match: (ShiftAllLeftMaskedInt64x4 x (MOVQconst [c]) mask)
+       // result: (VPSLLQMasked256const [int8(c)] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               mask := v_2
+               v.reset(OpAMD64VPSLLQMasked256const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
        // match: (ShiftAllLeftMaskedInt64x4 x y mask)
        // result: (VPSLLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
@@ -50940,6 +51266,22 @@ func rewriteValueAMD64_OpShiftAllLeftMaskedInt64x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
+       // match: (ShiftAllLeftMaskedInt64x8 x (MOVQconst [c]) mask)
+       // result: (VPSLLQMasked512const [int8(c)] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               mask := v_2
+               v.reset(OpAMD64VPSLLQMasked512const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
        // match: (ShiftAllLeftMaskedInt64x8 x y mask)
        // result: (VPSLLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
        for {
@@ -50958,6 +51300,22 @@ func rewriteValueAMD64_OpShiftAllLeftMaskedUint16x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
+       // match: (ShiftAllLeftMaskedUint16x16 x (MOVQconst [c]) mask)
+       // result: (VPSLLWMasked256const [int8(c)] x (VPMOVVec16x16ToM <types.TypeMask> mask))
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               mask := v_2
+               v.reset(OpAMD64VPSLLWMasked256const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
        // match: (ShiftAllLeftMaskedUint16x16 x y mask)
        // result: (VPSLLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
        for {
@@ -50976,6 +51334,22 @@ func rewriteValueAMD64_OpShiftAllLeftMaskedUint16x32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
+       // match: (ShiftAllLeftMaskedUint16x32 x (MOVQconst [c]) mask)
+       // result: (VPSLLWMasked512const [int8(c)] x (VPMOVVec16x32ToM <types.TypeMask> mask))
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               mask := v_2
+               v.reset(OpAMD64VPSLLWMasked512const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
        // match: (ShiftAllLeftMaskedUint16x32 x y mask)
        // result: (VPSLLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
        for {
@@ -50994,6 +51368,22 @@ func rewriteValueAMD64_OpShiftAllLeftMaskedUint16x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
+       // match: (ShiftAllLeftMaskedUint16x8 x (MOVQconst [c]) mask)
+       // result: (VPSLLWMasked128const [int8(c)] x (VPMOVVec16x8ToM <types.TypeMask> mask))
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               mask := v_2
+               v.reset(OpAMD64VPSLLWMasked128const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
        // match: (ShiftAllLeftMaskedUint16x8 x y mask)
        // result: (VPSLLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
        for {
@@ -51012,6 +51402,22 @@ func rewriteValueAMD64_OpShiftAllLeftMaskedUint32x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
+       // match: (ShiftAllLeftMaskedUint32x16 x (MOVQconst [c]) mask)
+       // result: (VPSLLDMasked512const [int8(c)] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               mask := v_2
+               v.reset(OpAMD64VPSLLDMasked512const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
        // match: (ShiftAllLeftMaskedUint32x16 x y mask)
        // result: (VPSLLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
        for {
@@ -51030,6 +51436,22 @@ func rewriteValueAMD64_OpShiftAllLeftMaskedUint32x4(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
+       // match: (ShiftAllLeftMaskedUint32x4 x (MOVQconst [c]) mask)
+       // result: (VPSLLDMasked128const [int8(c)] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               mask := v_2
+               v.reset(OpAMD64VPSLLDMasked128const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
        // match: (ShiftAllLeftMaskedUint32x4 x y mask)
        // result: (VPSLLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
        for {
@@ -51048,6 +51470,22 @@ func rewriteValueAMD64_OpShiftAllLeftMaskedUint32x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
+       // match: (ShiftAllLeftMaskedUint32x8 x (MOVQconst [c]) mask)
+       // result: (VPSLLDMasked256const [int8(c)] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               mask := v_2
+               v.reset(OpAMD64VPSLLDMasked256const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
        // match: (ShiftAllLeftMaskedUint32x8 x y mask)
        // result: (VPSLLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
@@ -51066,6 +51504,22 @@ func rewriteValueAMD64_OpShiftAllLeftMaskedUint64x2(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
+       // match: (ShiftAllLeftMaskedUint64x2 x (MOVQconst [c]) mask)
+       // result: (VPSLLQMasked128const [int8(c)] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               mask := v_2
+               v.reset(OpAMD64VPSLLQMasked128const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
        // match: (ShiftAllLeftMaskedUint64x2 x y mask)
        // result: (VPSLLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
@@ -51084,6 +51538,22 @@ func rewriteValueAMD64_OpShiftAllLeftMaskedUint64x4(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
+       // match: (ShiftAllLeftMaskedUint64x4 x (MOVQconst [c]) mask)
+       // result: (VPSLLQMasked256const [int8(c)] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               mask := v_2
+               v.reset(OpAMD64VPSLLQMasked256const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
        // match: (ShiftAllLeftMaskedUint64x4 x y mask)
        // result: (VPSLLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
@@ -51102,6 +51572,22 @@ func rewriteValueAMD64_OpShiftAllLeftMaskedUint64x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
+       // match: (ShiftAllLeftMaskedUint64x8 x (MOVQconst [c]) mask)
+       // result: (VPSLLQMasked512const [int8(c)] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               mask := v_2
+               v.reset(OpAMD64VPSLLQMasked512const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
        // match: (ShiftAllLeftMaskedUint64x8 x y mask)
        // result: (VPSLLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
        for {
@@ -51115,68 +51601,302 @@ func rewriteValueAMD64_OpShiftAllLeftMaskedUint64x8(v *Value) bool {
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt16x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpShiftAllLeftUint16x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllRightConcatMaskedInt16x16 [a] x y mask)
-       // result: (VPSHRDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (ShiftAllLeftUint16x16 x (MOVQconst [c]))
+       // result: (VPSLLW256const [int8(c)] x)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64VPSLLW256const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v.AddArg(x)
+               return true
+       }
+       // match: (ShiftAllLeftUint16x16 x y)
+       // result: (VPSLLW256 x y)
        for {
-               a := auxIntToInt8(v.AuxInt)
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHRDWMasked256)
-               v.AuxInt = int8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VPSLLW256)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt16x32(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpShiftAllLeftUint16x32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllRightConcatMaskedInt16x32 [a] x y mask)
-       // result: (VPSHRDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (ShiftAllLeftUint16x32 x (MOVQconst [c]))
+       // result: (VPSLLW512const [int8(c)] x)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64VPSLLW512const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v.AddArg(x)
+               return true
+       }
+       // match: (ShiftAllLeftUint16x32 x y)
+       // result: (VPSLLW512 x y)
        for {
-               a := auxIntToInt8(v.AuxInt)
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHRDWMasked512)
-               v.AuxInt = int8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VPSLLW512)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt16x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpShiftAllLeftUint16x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllRightConcatMaskedInt16x8 [a] x y mask)
-       // result: (VPSHRDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (ShiftAllLeftUint16x8 x (MOVQconst [c]))
+       // result: (VPSLLW128const [int8(c)] x)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64VPSLLW128const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v.AddArg(x)
+               return true
+       }
+       // match: (ShiftAllLeftUint16x8 x y)
+       // result: (VPSLLW128 x y)
        for {
-               a := auxIntToInt8(v.AuxInt)
                x := v_0
                y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHRDWMasked128)
-               v.AuxInt = int8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VPSLLW128)
+               v.AddArg2(x, y)
                return true
        }
 }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt32x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpShiftAllLeftUint32x16(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (ShiftAllLeftUint32x16 x (MOVQconst [c]))
+       // result: (VPSLLD512const [int8(c)] x)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64VPSLLD512const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v.AddArg(x)
+               return true
+       }
+       // match: (ShiftAllLeftUint32x16 x y)
+       // result: (VPSLLD512 x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VPSLLD512)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpShiftAllLeftUint32x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (ShiftAllLeftUint32x4 x (MOVQconst [c]))
+       // result: (VPSLLD128const [int8(c)] x)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64VPSLLD128const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v.AddArg(x)
+               return true
+       }
+       // match: (ShiftAllLeftUint32x4 x y)
+       // result: (VPSLLD128 x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VPSLLD128)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpShiftAllLeftUint32x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (ShiftAllLeftUint32x8 x (MOVQconst [c]))
+       // result: (VPSLLD256const [int8(c)] x)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64VPSLLD256const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v.AddArg(x)
+               return true
+       }
+       // match: (ShiftAllLeftUint32x8 x y)
+       // result: (VPSLLD256 x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VPSLLD256)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpShiftAllLeftUint64x2(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (ShiftAllLeftUint64x2 x (MOVQconst [c]))
+       // result: (VPSLLQ128const [int8(c)] x)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64VPSLLQ128const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v.AddArg(x)
+               return true
+       }
+       // match: (ShiftAllLeftUint64x2 x y)
+       // result: (VPSLLQ128 x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VPSLLQ128)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpShiftAllLeftUint64x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (ShiftAllLeftUint64x4 x (MOVQconst [c]))
+       // result: (VPSLLQ256const [int8(c)] x)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64VPSLLQ256const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v.AddArg(x)
+               return true
+       }
+       // match: (ShiftAllLeftUint64x4 x y)
+       // result: (VPSLLQ256 x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VPSLLQ256)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpShiftAllLeftUint64x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (ShiftAllLeftUint64x8 x (MOVQconst [c]))
+       // result: (VPSLLQ512const [int8(c)] x)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64VPSLLQ512const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v.AddArg(x)
+               return true
+       }
+       // match: (ShiftAllLeftUint64x8 x y)
+       // result: (VPSLLQ512 x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VPSLLQ512)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt16x16(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (ShiftAllRightConcatMaskedInt16x16 [a] x y mask)
+       // result: (VPSHRDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               y := v_1
+               mask := v_2
+               v.reset(OpAMD64VPSHRDWMasked256)
+               v.AuxInt = int8ToAuxInt(a)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg3(x, y, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt16x32(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (ShiftAllRightConcatMaskedInt16x32 [a] x y mask)
+       // result: (VPSHRDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               y := v_1
+               mask := v_2
+               v.reset(OpAMD64VPSHRDWMasked512)
+               v.AuxInt = int8ToAuxInt(a)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg3(x, y, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt16x8(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (ShiftAllRightConcatMaskedInt16x8 [a] x y mask)
+       // result: (VPSHRDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               y := v_1
+               mask := v_2
+               v.reset(OpAMD64VPSHRDWMasked128)
+               v.AuxInt = int8ToAuxInt(a)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg3(x, y, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt32x16(v *Value) bool {
+       v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
@@ -51475,11 +52195,261 @@ func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint64x8(v *Value) bool {
                return true
        }
 }
+func rewriteValueAMD64_OpShiftAllRightInt16x16(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (ShiftAllRightInt16x16 x (MOVQconst [c]))
+       // result: (VPSRAW256const [int8(c)] x)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64VPSRAW256const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v.AddArg(x)
+               return true
+       }
+       // match: (ShiftAllRightInt16x16 x y)
+       // result: (VPSRAW256 x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VPSRAW256)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpShiftAllRightInt16x32(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (ShiftAllRightInt16x32 x (MOVQconst [c]))
+       // result: (VPSRAW512const [int8(c)] x)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64VPSRAW512const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v.AddArg(x)
+               return true
+       }
+       // match: (ShiftAllRightInt16x32 x y)
+       // result: (VPSRAW512 x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VPSRAW512)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpShiftAllRightInt16x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (ShiftAllRightInt16x8 x (MOVQconst [c]))
+       // result: (VPSRAW128const [int8(c)] x)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64VPSRAW128const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v.AddArg(x)
+               return true
+       }
+       // match: (ShiftAllRightInt16x8 x y)
+       // result: (VPSRAW128 x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VPSRAW128)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpShiftAllRightInt32x16(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (ShiftAllRightInt32x16 x (MOVQconst [c]))
+       // result: (VPSRAD512const [int8(c)] x)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64VPSRAD512const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v.AddArg(x)
+               return true
+       }
+       // match: (ShiftAllRightInt32x16 x y)
+       // result: (VPSRAD512 x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VPSRAD512)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpShiftAllRightInt32x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (ShiftAllRightInt32x4 x (MOVQconst [c]))
+       // result: (VPSRAD128const [int8(c)] x)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64VPSRAD128const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v.AddArg(x)
+               return true
+       }
+       // match: (ShiftAllRightInt32x4 x y)
+       // result: (VPSRAD128 x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VPSRAD128)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpShiftAllRightInt32x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (ShiftAllRightInt32x8 x (MOVQconst [c]))
+       // result: (VPSRAD256const [int8(c)] x)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64VPSRAD256const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v.AddArg(x)
+               return true
+       }
+       // match: (ShiftAllRightInt32x8 x y)
+       // result: (VPSRAD256 x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VPSRAD256)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpShiftAllRightInt64x2(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (ShiftAllRightInt64x2 x (MOVQconst [c]))
+       // result: (VPSRAQ128const [int8(c)] x)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64VPSRAQ128const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v.AddArg(x)
+               return true
+       }
+       // match: (ShiftAllRightInt64x2 x y)
+       // result: (VPSRAQ128 x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VPSRAQ128)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpShiftAllRightInt64x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (ShiftAllRightInt64x4 x (MOVQconst [c]))
+       // result: (VPSRAQ256const [int8(c)] x)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64VPSRAQ256const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v.AddArg(x)
+               return true
+       }
+       // match: (ShiftAllRightInt64x4 x y)
+       // result: (VPSRAQ256 x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VPSRAQ256)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpShiftAllRightInt64x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (ShiftAllRightInt64x8 x (MOVQconst [c]))
+       // result: (VPSRAQ512const [int8(c)] x)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64VPSRAQ512const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v.AddArg(x)
+               return true
+       }
+       // match: (ShiftAllRightInt64x8 x y)
+       // result: (VPSRAQ512 x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VPSRAQ512)
+               v.AddArg2(x, y)
+               return true
+       }
+}
 func rewriteValueAMD64_OpShiftAllRightMaskedInt16x16(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
+       // match: (ShiftAllRightMaskedInt16x16 x (MOVQconst [c]) mask)
+       // result: (VPSRAWMasked256const [int8(c)] x (VPMOVVec16x16ToM <types.TypeMask> mask))
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               mask := v_2
+               v.reset(OpAMD64VPSRAWMasked256const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
        // match: (ShiftAllRightMaskedInt16x16 x y mask)
        // result: (VPSRAWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
        for {
@@ -51498,6 +52468,22 @@ func rewriteValueAMD64_OpShiftAllRightMaskedInt16x32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
+       // match: (ShiftAllRightMaskedInt16x32 x (MOVQconst [c]) mask)
+       // result: (VPSRAWMasked512const [int8(c)] x (VPMOVVec16x32ToM <types.TypeMask> mask))
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               mask := v_2
+               v.reset(OpAMD64VPSRAWMasked512const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
        // match: (ShiftAllRightMaskedInt16x32 x y mask)
        // result: (VPSRAWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
        for {
@@ -51516,6 +52502,22 @@ func rewriteValueAMD64_OpShiftAllRightMaskedInt16x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
+       // match: (ShiftAllRightMaskedInt16x8 x (MOVQconst [c]) mask)
+       // result: (VPSRAWMasked128const [int8(c)] x (VPMOVVec16x8ToM <types.TypeMask> mask))
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               mask := v_2
+               v.reset(OpAMD64VPSRAWMasked128const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
        // match: (ShiftAllRightMaskedInt16x8 x y mask)
        // result: (VPSRAWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
        for {
@@ -51534,6 +52536,22 @@ func rewriteValueAMD64_OpShiftAllRightMaskedInt32x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
+       // match: (ShiftAllRightMaskedInt32x16 x (MOVQconst [c]) mask)
+       // result: (VPSRADMasked512const [int8(c)] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               mask := v_2
+               v.reset(OpAMD64VPSRADMasked512const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
        // match: (ShiftAllRightMaskedInt32x16 x y mask)
        // result: (VPSRADMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
        for {
@@ -51552,6 +52570,22 @@ func rewriteValueAMD64_OpShiftAllRightMaskedInt32x4(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
+       // match: (ShiftAllRightMaskedInt32x4 x (MOVQconst [c]) mask)
+       // result: (VPSRADMasked128const [int8(c)] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               mask := v_2
+               v.reset(OpAMD64VPSRADMasked128const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
        // match: (ShiftAllRightMaskedInt32x4 x y mask)
        // result: (VPSRADMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
        for {
@@ -51570,6 +52604,22 @@ func rewriteValueAMD64_OpShiftAllRightMaskedInt32x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
+       // match: (ShiftAllRightMaskedInt32x8 x (MOVQconst [c]) mask)
+       // result: (VPSRADMasked256const [int8(c)] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               mask := v_2
+               v.reset(OpAMD64VPSRADMasked256const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
        // match: (ShiftAllRightMaskedInt32x8 x y mask)
        // result: (VPSRADMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
@@ -51588,6 +52638,22 @@ func rewriteValueAMD64_OpShiftAllRightMaskedInt64x2(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
+       // match: (ShiftAllRightMaskedInt64x2 x (MOVQconst [c]) mask)
+       // result: (VPSRAQMasked128const [int8(c)] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               mask := v_2
+               v.reset(OpAMD64VPSRAQMasked128const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
        // match: (ShiftAllRightMaskedInt64x2 x y mask)
        // result: (VPSRAQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
@@ -51606,6 +52672,22 @@ func rewriteValueAMD64_OpShiftAllRightMaskedInt64x4(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
+       // match: (ShiftAllRightMaskedInt64x4 x (MOVQconst [c]) mask)
+       // result: (VPSRAQMasked256const [int8(c)] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               mask := v_2
+               v.reset(OpAMD64VPSRAQMasked256const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
        // match: (ShiftAllRightMaskedInt64x4 x y mask)
        // result: (VPSRAQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
@@ -51624,6 +52706,22 @@ func rewriteValueAMD64_OpShiftAllRightMaskedInt64x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
+       // match: (ShiftAllRightMaskedInt64x8 x (MOVQconst [c]) mask)
+       // result: (VPSRAQMasked512const [int8(c)] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               mask := v_2
+               v.reset(OpAMD64VPSRAQMasked512const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
        // match: (ShiftAllRightMaskedInt64x8 x y mask)
        // result: (VPSRAQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
        for {
@@ -51642,6 +52740,22 @@ func rewriteValueAMD64_OpShiftAllRightMaskedUint16x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
+       // match: (ShiftAllRightMaskedUint16x16 x (MOVQconst [c]) mask)
+       // result: (VPSRLWMasked256const [int8(c)] x (VPMOVVec16x16ToM <types.TypeMask> mask))
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               mask := v_2
+               v.reset(OpAMD64VPSRLWMasked256const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
        // match: (ShiftAllRightMaskedUint16x16 x y mask)
        // result: (VPSRLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
        for {
@@ -51660,6 +52774,22 @@ func rewriteValueAMD64_OpShiftAllRightMaskedUint16x32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
+       // match: (ShiftAllRightMaskedUint16x32 x (MOVQconst [c]) mask)
+       // result: (VPSRLWMasked512const [int8(c)] x (VPMOVVec16x32ToM <types.TypeMask> mask))
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               mask := v_2
+               v.reset(OpAMD64VPSRLWMasked512const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
        // match: (ShiftAllRightMaskedUint16x32 x y mask)
        // result: (VPSRLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
        for {
@@ -51678,6 +52808,22 @@ func rewriteValueAMD64_OpShiftAllRightMaskedUint16x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
+       // match: (ShiftAllRightMaskedUint16x8 x (MOVQconst [c]) mask)
+       // result: (VPSRLWMasked128const [int8(c)] x (VPMOVVec16x8ToM <types.TypeMask> mask))
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               mask := v_2
+               v.reset(OpAMD64VPSRLWMasked128const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
        // match: (ShiftAllRightMaskedUint16x8 x y mask)
        // result: (VPSRLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
        for {
@@ -51696,6 +52842,22 @@ func rewriteValueAMD64_OpShiftAllRightMaskedUint32x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
+       // match: (ShiftAllRightMaskedUint32x16 x (MOVQconst [c]) mask)
+       // result: (VPSRLDMasked512const [int8(c)] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               mask := v_2
+               v.reset(OpAMD64VPSRLDMasked512const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
        // match: (ShiftAllRightMaskedUint32x16 x y mask)
        // result: (VPSRLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
        for {
@@ -51714,6 +52876,22 @@ func rewriteValueAMD64_OpShiftAllRightMaskedUint32x4(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
+       // match: (ShiftAllRightMaskedUint32x4 x (MOVQconst [c]) mask)
+       // result: (VPSRLDMasked128const [int8(c)] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               mask := v_2
+               v.reset(OpAMD64VPSRLDMasked128const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
        // match: (ShiftAllRightMaskedUint32x4 x y mask)
        // result: (VPSRLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
        for {
@@ -51732,6 +52910,22 @@ func rewriteValueAMD64_OpShiftAllRightMaskedUint32x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
+       // match: (ShiftAllRightMaskedUint32x8 x (MOVQconst [c]) mask)
+       // result: (VPSRLDMasked256const [int8(c)] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               mask := v_2
+               v.reset(OpAMD64VPSRLDMasked256const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
        // match: (ShiftAllRightMaskedUint32x8 x y mask)
        // result: (VPSRLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
@@ -51750,6 +52944,22 @@ func rewriteValueAMD64_OpShiftAllRightMaskedUint64x2(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
+       // match: (ShiftAllRightMaskedUint64x2 x (MOVQconst [c]) mask)
+       // result: (VPSRLQMasked128const [int8(c)] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               mask := v_2
+               v.reset(OpAMD64VPSRLQMasked128const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
        // match: (ShiftAllRightMaskedUint64x2 x y mask)
        // result: (VPSRLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
@@ -51768,6 +52978,22 @@ func rewriteValueAMD64_OpShiftAllRightMaskedUint64x4(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
+       // match: (ShiftAllRightMaskedUint64x4 x (MOVQconst [c]) mask)
+       // result: (VPSRLQMasked256const [int8(c)] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               mask := v_2
+               v.reset(OpAMD64VPSRLQMasked256const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
        // match: (ShiftAllRightMaskedUint64x4 x y mask)
        // result: (VPSRLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
@@ -51786,6 +53012,22 @@ func rewriteValueAMD64_OpShiftAllRightMaskedUint64x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
+       // match: (ShiftAllRightMaskedUint64x8 x (MOVQconst [c]) mask)
+       // result: (VPSRLQMasked512const [int8(c)] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               mask := v_2
+               v.reset(OpAMD64VPSRLQMasked512const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
        // match: (ShiftAllRightMaskedUint64x8 x y mask)
        // result: (VPSRLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
        for {
@@ -51799,6 +53041,240 @@ func rewriteValueAMD64_OpShiftAllRightMaskedUint64x8(v *Value) bool {
                return true
        }
 }
+func rewriteValueAMD64_OpShiftAllRightUint16x16(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (ShiftAllRightUint16x16 x (MOVQconst [c]))
+       // result: (VPSRLW256const [int8(c)] x)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64VPSRLW256const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v.AddArg(x)
+               return true
+       }
+       // match: (ShiftAllRightUint16x16 x y)
+       // result: (VPSRLW256 x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VPSRLW256)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpShiftAllRightUint16x32(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (ShiftAllRightUint16x32 x (MOVQconst [c]))
+       // result: (VPSRLW512const [int8(c)] x)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64VPSRLW512const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v.AddArg(x)
+               return true
+       }
+       // match: (ShiftAllRightUint16x32 x y)
+       // result: (VPSRLW512 x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VPSRLW512)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpShiftAllRightUint16x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (ShiftAllRightUint16x8 x (MOVQconst [c]))
+       // result: (VPSRLW128const [int8(c)] x)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64VPSRLW128const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v.AddArg(x)
+               return true
+       }
+       // match: (ShiftAllRightUint16x8 x y)
+       // result: (VPSRLW128 x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VPSRLW128)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpShiftAllRightUint32x16(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (ShiftAllRightUint32x16 x (MOVQconst [c]))
+       // result: (VPSRLD512const [int8(c)] x)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64VPSRLD512const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v.AddArg(x)
+               return true
+       }
+       // match: (ShiftAllRightUint32x16 x y)
+       // result: (VPSRLD512 x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VPSRLD512)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpShiftAllRightUint32x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (ShiftAllRightUint32x4 x (MOVQconst [c]))
+       // result: (VPSRLD128const [int8(c)] x)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64VPSRLD128const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v.AddArg(x)
+               return true
+       }
+       // match: (ShiftAllRightUint32x4 x y)
+       // result: (VPSRLD128 x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VPSRLD128)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpShiftAllRightUint32x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (ShiftAllRightUint32x8 x (MOVQconst [c]))
+       // result: (VPSRLD256const [int8(c)] x)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64VPSRLD256const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v.AddArg(x)
+               return true
+       }
+       // match: (ShiftAllRightUint32x8 x y)
+       // result: (VPSRLD256 x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VPSRLD256)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpShiftAllRightUint64x2(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (ShiftAllRightUint64x2 x (MOVQconst [c]))
+       // result: (VPSRLQ128const [int8(c)] x)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64VPSRLQ128const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v.AddArg(x)
+               return true
+       }
+       // match: (ShiftAllRightUint64x2 x y)
+       // result: (VPSRLQ128 x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VPSRLQ128)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpShiftAllRightUint64x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (ShiftAllRightUint64x4 x (MOVQconst [c]))
+       // result: (VPSRLQ256const [int8(c)] x)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64VPSRLQ256const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v.AddArg(x)
+               return true
+       }
+       // match: (ShiftAllRightUint64x4 x y)
+       // result: (VPSRLQ256 x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VPSRLQ256)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpShiftAllRightUint64x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (ShiftAllRightUint64x8 x (MOVQconst [c]))
+       // result: (VPSRLQ512const [int8(c)] x)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64MOVQconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               v.reset(OpAMD64VPSRLQ512const)
+               v.AuxInt = int8ToAuxInt(int8(c))
+               v.AddArg(x)
+               return true
+       }
+       // match: (ShiftAllRightUint64x8 x y)
+       // result: (VPSRLQ512 x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VPSRLQ512)
+               v.AddArg2(x, y)
+               return true
+       }
+}
 func rewriteValueAMD64_OpShiftLeftConcatMaskedInt16x16(v *Value) bool {
        v_3 := v.Args[3]
        v_2 := v.Args[2]
index 2326addea94b8ef2e5cbae0fc0123da4cd7619d0..1df27f875760aa275a78494985d24e23fcc86564 100644 (file)
@@ -206,6 +206,24 @@ func TestPairDotProdAccumulate(t *testing.T) {
        }
 }
 
+var testShiftAllVal uint64 = 3
+
+func TestShiftAll(t *testing.T) {
+       got := make([]int32, 4)
+       simd.LoadInt32x4Slice([]int32{0b11, 0b11, 0b11, 0b11}).ShiftAllLeft(2).StoreSlice(got)
+       for _, v := range got {
+               if v != 0b1100 {
+                       t.Errorf("expect 0b1100, got %b", v)
+               }
+       }
+       simd.LoadInt32x4Slice([]int32{0b11, 0b11, 0b11, 0b11}).ShiftAllLeft(testShiftAllVal).StoreSlice(got)
+       for _, v := range got {
+               if v != 0b11000 {
+                       t.Errorf("expect 0b11000, got %b", v)
+               }
+       }
+}
+
 func TestSlicesInt8(t *testing.T) {
        a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}