]> Cypherpunks repositories - gostls13.git/commitdiff
[dev.simd] cmd/compile: fix unstable output
authorJunyang Shao <shaojunyang@google.com>
Thu, 13 Nov 2025 17:07:16 +0000 (17:07 +0000)
committerJunyang Shao <shaojunyang@google.com>
Mon, 17 Nov 2025 21:37:22 +0000 (13:37 -0800)
This CL fixed an error left by CL 718160.

Change-Id: I442ea59bc1ff0dda2914d1858dd5ebe93e2818dc
Reviewed-on: https://go-review.googlesource.com/c/go/+/720281
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
src/cmd/compile/internal/amd64/simdssa.go
src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteAMD64.go
src/simd/_gen/simdgen/gen_simdrules.go
src/simd/_gen/simdgen/godefs.go

index 0abcd95e3715f2f7c6da657f53742479b8360f86..9425b42d41016b30b648f3bbe112428e898e07af 100644 (file)
@@ -42,22 +42,38 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
                ssa.OpAMD64VPBROADCASTW512,
                ssa.OpAMD64VPBROADCASTD512,
                ssa.OpAMD64VPBROADCASTQ512,
-               ssa.OpAMD64VPMOVWB128,
+               ssa.OpAMD64VPMOVWB128_128,
+               ssa.OpAMD64VPMOVWB128_256,
                ssa.OpAMD64VPMOVWB256,
-               ssa.OpAMD64VPMOVDB128,
-               ssa.OpAMD64VPMOVQB128,
-               ssa.OpAMD64VPMOVSWB128,
+               ssa.OpAMD64VPMOVDB128_128,
+               ssa.OpAMD64VPMOVDB128_256,
+               ssa.OpAMD64VPMOVDB128_512,
+               ssa.OpAMD64VPMOVQB128_128,
+               ssa.OpAMD64VPMOVQB128_256,
+               ssa.OpAMD64VPMOVQB128_512,
+               ssa.OpAMD64VPMOVSWB128_128,
+               ssa.OpAMD64VPMOVSWB128_256,
                ssa.OpAMD64VPMOVSWB256,
-               ssa.OpAMD64VPMOVSDB128,
-               ssa.OpAMD64VPMOVSQB128,
+               ssa.OpAMD64VPMOVSDB128_128,
+               ssa.OpAMD64VPMOVSDB128_256,
+               ssa.OpAMD64VPMOVSDB128_512,
+               ssa.OpAMD64VPMOVSQB128_128,
+               ssa.OpAMD64VPMOVSQB128_256,
+               ssa.OpAMD64VPMOVSQB128_512,
                ssa.OpAMD64VPMOVSXBW256,
                ssa.OpAMD64VPMOVSXBW512,
-               ssa.OpAMD64VPMOVDW128,
+               ssa.OpAMD64VPMOVDW128_128,
+               ssa.OpAMD64VPMOVDW128_256,
                ssa.OpAMD64VPMOVDW256,
-               ssa.OpAMD64VPMOVQW128,
-               ssa.OpAMD64VPMOVSDW128,
+               ssa.OpAMD64VPMOVQW128_128,
+               ssa.OpAMD64VPMOVQW128_256,
+               ssa.OpAMD64VPMOVQW128_512,
+               ssa.OpAMD64VPMOVSDW128_128,
+               ssa.OpAMD64VPMOVSDW128_256,
                ssa.OpAMD64VPMOVSDW256,
-               ssa.OpAMD64VPMOVSQW128,
+               ssa.OpAMD64VPMOVSQW128_128,
+               ssa.OpAMD64VPMOVSQW128_256,
+               ssa.OpAMD64VPMOVSQW128_512,
                ssa.OpAMD64VPMOVSXBW128,
                ssa.OpAMD64VCVTTPS2DQ128,
                ssa.OpAMD64VCVTTPS2DQ256,
@@ -65,9 +81,11 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
                ssa.OpAMD64VPMOVSXBD512,
                ssa.OpAMD64VPMOVSXWD256,
                ssa.OpAMD64VPMOVSXWD512,
-               ssa.OpAMD64VPMOVQD128,
+               ssa.OpAMD64VPMOVQD128_128,
+               ssa.OpAMD64VPMOVQD128_256,
                ssa.OpAMD64VPMOVQD256,
-               ssa.OpAMD64VPMOVSQD128,
+               ssa.OpAMD64VPMOVSQD128_128,
+               ssa.OpAMD64VPMOVSQD128_256,
                ssa.OpAMD64VPMOVSQD256,
                ssa.OpAMD64VPMOVSXBD128,
                ssa.OpAMD64VPMOVSXWD128,
@@ -80,15 +98,23 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
                ssa.OpAMD64VPMOVSXDQ128,
                ssa.OpAMD64VPMOVSXBQ256,
                ssa.OpAMD64VPMOVSXBQ512,
-               ssa.OpAMD64VPMOVUSWB128,
+               ssa.OpAMD64VPMOVUSWB128_128,
+               ssa.OpAMD64VPMOVUSWB128_256,
                ssa.OpAMD64VPMOVUSWB256,
-               ssa.OpAMD64VPMOVUSDB128,
-               ssa.OpAMD64VPMOVUSQB128,
+               ssa.OpAMD64VPMOVUSDB128_128,
+               ssa.OpAMD64VPMOVUSDB128_256,
+               ssa.OpAMD64VPMOVUSDB128_512,
+               ssa.OpAMD64VPMOVUSQB128_128,
+               ssa.OpAMD64VPMOVUSQB128_256,
+               ssa.OpAMD64VPMOVUSQB128_512,
                ssa.OpAMD64VPMOVZXBW256,
                ssa.OpAMD64VPMOVZXBW512,
-               ssa.OpAMD64VPMOVUSDW128,
+               ssa.OpAMD64VPMOVUSDW128_128,
+               ssa.OpAMD64VPMOVUSDW128_256,
                ssa.OpAMD64VPMOVUSDW256,
-               ssa.OpAMD64VPMOVUSQW128,
+               ssa.OpAMD64VPMOVUSQW128_128,
+               ssa.OpAMD64VPMOVUSQW128_256,
+               ssa.OpAMD64VPMOVUSQW128_512,
                ssa.OpAMD64VPMOVZXBW128,
                ssa.OpAMD64VCVTPS2UDQ128,
                ssa.OpAMD64VCVTPS2UDQ256,
@@ -96,7 +122,8 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
                ssa.OpAMD64VPMOVZXBD512,
                ssa.OpAMD64VPMOVZXWD256,
                ssa.OpAMD64VPMOVZXWD512,
-               ssa.OpAMD64VPMOVUSQD128,
+               ssa.OpAMD64VPMOVUSQD128_128,
+               ssa.OpAMD64VPMOVUSQD128_256,
                ssa.OpAMD64VPMOVUSQD256,
                ssa.OpAMD64VPMOVZXBD128,
                ssa.OpAMD64VPMOVZXWD128,
@@ -791,22 +818,38 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
                ssa.OpAMD64VPCOMPRESSQMasked128,
                ssa.OpAMD64VPCOMPRESSQMasked256,
                ssa.OpAMD64VPCOMPRESSQMasked512,
-               ssa.OpAMD64VPMOVWBMasked128,
+               ssa.OpAMD64VPMOVWBMasked128_128,
+               ssa.OpAMD64VPMOVWBMasked128_256,
                ssa.OpAMD64VPMOVWBMasked256,
-               ssa.OpAMD64VPMOVDBMasked128,
-               ssa.OpAMD64VPMOVQBMasked128,
-               ssa.OpAMD64VPMOVSWBMasked128,
+               ssa.OpAMD64VPMOVDBMasked128_128,
+               ssa.OpAMD64VPMOVDBMasked128_256,
+               ssa.OpAMD64VPMOVDBMasked128_512,
+               ssa.OpAMD64VPMOVQBMasked128_128,
+               ssa.OpAMD64VPMOVQBMasked128_256,
+               ssa.OpAMD64VPMOVQBMasked128_512,
+               ssa.OpAMD64VPMOVSWBMasked128_128,
+               ssa.OpAMD64VPMOVSWBMasked128_256,
                ssa.OpAMD64VPMOVSWBMasked256,
-               ssa.OpAMD64VPMOVSDBMasked128,
-               ssa.OpAMD64VPMOVSQBMasked128,
+               ssa.OpAMD64VPMOVSDBMasked128_128,
+               ssa.OpAMD64VPMOVSDBMasked128_256,
+               ssa.OpAMD64VPMOVSDBMasked128_512,
+               ssa.OpAMD64VPMOVSQBMasked128_128,
+               ssa.OpAMD64VPMOVSQBMasked128_256,
+               ssa.OpAMD64VPMOVSQBMasked128_512,
                ssa.OpAMD64VPMOVSXBWMasked256,
                ssa.OpAMD64VPMOVSXBWMasked512,
-               ssa.OpAMD64VPMOVDWMasked128,
+               ssa.OpAMD64VPMOVDWMasked128_128,
+               ssa.OpAMD64VPMOVDWMasked128_256,
                ssa.OpAMD64VPMOVDWMasked256,
-               ssa.OpAMD64VPMOVQWMasked128,
-               ssa.OpAMD64VPMOVSDWMasked128,
+               ssa.OpAMD64VPMOVQWMasked128_128,
+               ssa.OpAMD64VPMOVQWMasked128_256,
+               ssa.OpAMD64VPMOVQWMasked128_512,
+               ssa.OpAMD64VPMOVSDWMasked128_128,
+               ssa.OpAMD64VPMOVSDWMasked128_256,
                ssa.OpAMD64VPMOVSDWMasked256,
-               ssa.OpAMD64VPMOVSQWMasked128,
+               ssa.OpAMD64VPMOVSQWMasked128_128,
+               ssa.OpAMD64VPMOVSQWMasked128_256,
+               ssa.OpAMD64VPMOVSQWMasked128_512,
                ssa.OpAMD64VPMOVSXBWMasked128,
                ssa.OpAMD64VCVTTPS2DQMasked128,
                ssa.OpAMD64VCVTTPS2DQMasked256,
@@ -814,9 +857,11 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
                ssa.OpAMD64VPMOVSXBDMasked512,
                ssa.OpAMD64VPMOVSXWDMasked256,
                ssa.OpAMD64VPMOVSXWDMasked512,
-               ssa.OpAMD64VPMOVQDMasked128,
+               ssa.OpAMD64VPMOVQDMasked128_128,
+               ssa.OpAMD64VPMOVQDMasked128_256,
                ssa.OpAMD64VPMOVQDMasked256,
-               ssa.OpAMD64VPMOVSQDMasked128,
+               ssa.OpAMD64VPMOVSQDMasked128_128,
+               ssa.OpAMD64VPMOVSQDMasked128_256,
                ssa.OpAMD64VPMOVSQDMasked256,
                ssa.OpAMD64VPMOVSXBDMasked128,
                ssa.OpAMD64VPMOVSXWDMasked128,
@@ -829,15 +874,23 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
                ssa.OpAMD64VPMOVSXDQMasked128,
                ssa.OpAMD64VPMOVSXBQMasked256,
                ssa.OpAMD64VPMOVSXBQMasked512,
-               ssa.OpAMD64VPMOVUSWBMasked128,
+               ssa.OpAMD64VPMOVUSWBMasked128_128,
+               ssa.OpAMD64VPMOVUSWBMasked128_256,
                ssa.OpAMD64VPMOVUSWBMasked256,
-               ssa.OpAMD64VPMOVUSDBMasked128,
-               ssa.OpAMD64VPMOVUSQBMasked128,
+               ssa.OpAMD64VPMOVUSDBMasked128_128,
+               ssa.OpAMD64VPMOVUSDBMasked128_256,
+               ssa.OpAMD64VPMOVUSDBMasked128_512,
+               ssa.OpAMD64VPMOVUSQBMasked128_128,
+               ssa.OpAMD64VPMOVUSQBMasked128_256,
+               ssa.OpAMD64VPMOVUSQBMasked128_512,
                ssa.OpAMD64VPMOVZXBWMasked256,
                ssa.OpAMD64VPMOVZXBWMasked512,
-               ssa.OpAMD64VPMOVUSDWMasked128,
+               ssa.OpAMD64VPMOVUSDWMasked128_128,
+               ssa.OpAMD64VPMOVUSDWMasked128_256,
                ssa.OpAMD64VPMOVUSDWMasked256,
-               ssa.OpAMD64VPMOVUSQWMasked128,
+               ssa.OpAMD64VPMOVUSQWMasked128_128,
+               ssa.OpAMD64VPMOVUSQWMasked128_256,
+               ssa.OpAMD64VPMOVUSQWMasked128_512,
                ssa.OpAMD64VPMOVZXBWMasked128,
                ssa.OpAMD64VCVTPS2UDQMasked128,
                ssa.OpAMD64VCVTPS2UDQMasked256,
@@ -845,7 +898,8 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
                ssa.OpAMD64VPMOVZXBDMasked512,
                ssa.OpAMD64VPMOVZXWDMasked256,
                ssa.OpAMD64VPMOVZXWDMasked512,
-               ssa.OpAMD64VPMOVUSQDMasked128,
+               ssa.OpAMD64VPMOVUSQDMasked128_128,
+               ssa.OpAMD64VPMOVUSQDMasked128_256,
                ssa.OpAMD64VPMOVUSQDMasked256,
                ssa.OpAMD64VPMOVZXBDMasked128,
                ssa.OpAMD64VPMOVZXWDMasked128,
@@ -2266,22 +2320,38 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
                ssa.OpAMD64VREDUCEPDMasked128Merging,
                ssa.OpAMD64VREDUCEPDMasked256Merging,
                ssa.OpAMD64VREDUCEPDMasked512Merging,
-               ssa.OpAMD64VPMOVWBMasked128Merging,
+               ssa.OpAMD64VPMOVWBMasked128_128Merging,
+               ssa.OpAMD64VPMOVWBMasked128_256Merging,
                ssa.OpAMD64VPMOVWBMasked256Merging,
-               ssa.OpAMD64VPMOVDBMasked128Merging,
-               ssa.OpAMD64VPMOVQBMasked128Merging,
-               ssa.OpAMD64VPMOVSWBMasked128Merging,
+               ssa.OpAMD64VPMOVDBMasked128_128Merging,
+               ssa.OpAMD64VPMOVDBMasked128_256Merging,
+               ssa.OpAMD64VPMOVDBMasked128_512Merging,
+               ssa.OpAMD64VPMOVQBMasked128_128Merging,
+               ssa.OpAMD64VPMOVQBMasked128_256Merging,
+               ssa.OpAMD64VPMOVQBMasked128_512Merging,
+               ssa.OpAMD64VPMOVSWBMasked128_128Merging,
+               ssa.OpAMD64VPMOVSWBMasked128_256Merging,
                ssa.OpAMD64VPMOVSWBMasked256Merging,
-               ssa.OpAMD64VPMOVSDBMasked128Merging,
-               ssa.OpAMD64VPMOVSQBMasked128Merging,
+               ssa.OpAMD64VPMOVSDBMasked128_128Merging,
+               ssa.OpAMD64VPMOVSDBMasked128_256Merging,
+               ssa.OpAMD64VPMOVSDBMasked128_512Merging,
+               ssa.OpAMD64VPMOVSQBMasked128_128Merging,
+               ssa.OpAMD64VPMOVSQBMasked128_256Merging,
+               ssa.OpAMD64VPMOVSQBMasked128_512Merging,
                ssa.OpAMD64VPMOVSXBWMasked256Merging,
                ssa.OpAMD64VPMOVSXBWMasked512Merging,
-               ssa.OpAMD64VPMOVDWMasked128Merging,
+               ssa.OpAMD64VPMOVDWMasked128_128Merging,
+               ssa.OpAMD64VPMOVDWMasked128_256Merging,
                ssa.OpAMD64VPMOVDWMasked256Merging,
-               ssa.OpAMD64VPMOVQWMasked128Merging,
-               ssa.OpAMD64VPMOVSDWMasked128Merging,
+               ssa.OpAMD64VPMOVQWMasked128_128Merging,
+               ssa.OpAMD64VPMOVQWMasked128_256Merging,
+               ssa.OpAMD64VPMOVQWMasked128_512Merging,
+               ssa.OpAMD64VPMOVSDWMasked128_128Merging,
+               ssa.OpAMD64VPMOVSDWMasked128_256Merging,
                ssa.OpAMD64VPMOVSDWMasked256Merging,
-               ssa.OpAMD64VPMOVSQWMasked128Merging,
+               ssa.OpAMD64VPMOVSQWMasked128_128Merging,
+               ssa.OpAMD64VPMOVSQWMasked128_256Merging,
+               ssa.OpAMD64VPMOVSQWMasked128_512Merging,
                ssa.OpAMD64VPMOVSXBWMasked128Merging,
                ssa.OpAMD64VCVTTPS2DQMasked128Merging,
                ssa.OpAMD64VCVTTPS2DQMasked256Merging,
@@ -2289,9 +2359,11 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
                ssa.OpAMD64VPMOVSXBDMasked512Merging,
                ssa.OpAMD64VPMOVSXWDMasked256Merging,
                ssa.OpAMD64VPMOVSXWDMasked512Merging,
-               ssa.OpAMD64VPMOVQDMasked128Merging,
+               ssa.OpAMD64VPMOVQDMasked128_128Merging,
+               ssa.OpAMD64VPMOVQDMasked128_256Merging,
                ssa.OpAMD64VPMOVQDMasked256Merging,
-               ssa.OpAMD64VPMOVSQDMasked128Merging,
+               ssa.OpAMD64VPMOVSQDMasked128_128Merging,
+               ssa.OpAMD64VPMOVSQDMasked128_256Merging,
                ssa.OpAMD64VPMOVSQDMasked256Merging,
                ssa.OpAMD64VPMOVSXBDMasked128Merging,
                ssa.OpAMD64VPMOVSXWDMasked128Merging,
@@ -2304,15 +2376,23 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
                ssa.OpAMD64VPMOVSXDQMasked128Merging,
                ssa.OpAMD64VPMOVSXBQMasked256Merging,
                ssa.OpAMD64VPMOVSXBQMasked512Merging,
-               ssa.OpAMD64VPMOVUSWBMasked128Merging,
+               ssa.OpAMD64VPMOVUSWBMasked128_128Merging,
+               ssa.OpAMD64VPMOVUSWBMasked128_256Merging,
                ssa.OpAMD64VPMOVUSWBMasked256Merging,
-               ssa.OpAMD64VPMOVUSDBMasked128Merging,
-               ssa.OpAMD64VPMOVUSQBMasked128Merging,
+               ssa.OpAMD64VPMOVUSDBMasked128_128Merging,
+               ssa.OpAMD64VPMOVUSDBMasked128_256Merging,
+               ssa.OpAMD64VPMOVUSDBMasked128_512Merging,
+               ssa.OpAMD64VPMOVUSQBMasked128_128Merging,
+               ssa.OpAMD64VPMOVUSQBMasked128_256Merging,
+               ssa.OpAMD64VPMOVUSQBMasked128_512Merging,
                ssa.OpAMD64VPMOVZXBWMasked256Merging,
                ssa.OpAMD64VPMOVZXBWMasked512Merging,
-               ssa.OpAMD64VPMOVUSDWMasked128Merging,
+               ssa.OpAMD64VPMOVUSDWMasked128_128Merging,
+               ssa.OpAMD64VPMOVUSDWMasked128_256Merging,
                ssa.OpAMD64VPMOVUSDWMasked256Merging,
-               ssa.OpAMD64VPMOVUSQWMasked128Merging,
+               ssa.OpAMD64VPMOVUSQWMasked128_128Merging,
+               ssa.OpAMD64VPMOVUSQWMasked128_256Merging,
+               ssa.OpAMD64VPMOVUSQWMasked128_512Merging,
                ssa.OpAMD64VPMOVZXBWMasked128Merging,
                ssa.OpAMD64VCVTPS2UDQMasked128Merging,
                ssa.OpAMD64VCVTPS2UDQMasked256Merging,
@@ -2320,7 +2400,8 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
                ssa.OpAMD64VPMOVZXBDMasked512Merging,
                ssa.OpAMD64VPMOVZXWDMasked256Merging,
                ssa.OpAMD64VPMOVZXWDMasked512Merging,
-               ssa.OpAMD64VPMOVUSQDMasked128Merging,
+               ssa.OpAMD64VPMOVUSQDMasked128_128Merging,
+               ssa.OpAMD64VPMOVUSQDMasked128_256Merging,
                ssa.OpAMD64VPMOVUSQDMasked256Merging,
                ssa.OpAMD64VPMOVZXBDMasked128Merging,
                ssa.OpAMD64VPMOVZXWDMasked128Merging,
@@ -2592,22 +2673,38 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
                ssa.OpAMD64VPCOMPRESSQMasked128,
                ssa.OpAMD64VPCOMPRESSQMasked256,
                ssa.OpAMD64VPCOMPRESSQMasked512,
-               ssa.OpAMD64VPMOVWBMasked128,
+               ssa.OpAMD64VPMOVWBMasked128_128,
+               ssa.OpAMD64VPMOVWBMasked128_256,
                ssa.OpAMD64VPMOVWBMasked256,
-               ssa.OpAMD64VPMOVDBMasked128,
-               ssa.OpAMD64VPMOVQBMasked128,
-               ssa.OpAMD64VPMOVSWBMasked128,
+               ssa.OpAMD64VPMOVDBMasked128_128,
+               ssa.OpAMD64VPMOVDBMasked128_256,
+               ssa.OpAMD64VPMOVDBMasked128_512,
+               ssa.OpAMD64VPMOVQBMasked128_128,
+               ssa.OpAMD64VPMOVQBMasked128_256,
+               ssa.OpAMD64VPMOVQBMasked128_512,
+               ssa.OpAMD64VPMOVSWBMasked128_128,
+               ssa.OpAMD64VPMOVSWBMasked128_256,
                ssa.OpAMD64VPMOVSWBMasked256,
-               ssa.OpAMD64VPMOVSDBMasked128,
-               ssa.OpAMD64VPMOVSQBMasked128,
+               ssa.OpAMD64VPMOVSDBMasked128_128,
+               ssa.OpAMD64VPMOVSDBMasked128_256,
+               ssa.OpAMD64VPMOVSDBMasked128_512,
+               ssa.OpAMD64VPMOVSQBMasked128_128,
+               ssa.OpAMD64VPMOVSQBMasked128_256,
+               ssa.OpAMD64VPMOVSQBMasked128_512,
                ssa.OpAMD64VPMOVSXBWMasked256,
                ssa.OpAMD64VPMOVSXBWMasked512,
-               ssa.OpAMD64VPMOVDWMasked128,
+               ssa.OpAMD64VPMOVDWMasked128_128,
+               ssa.OpAMD64VPMOVDWMasked128_256,
                ssa.OpAMD64VPMOVDWMasked256,
-               ssa.OpAMD64VPMOVQWMasked128,
-               ssa.OpAMD64VPMOVSDWMasked128,
+               ssa.OpAMD64VPMOVQWMasked128_128,
+               ssa.OpAMD64VPMOVQWMasked128_256,
+               ssa.OpAMD64VPMOVQWMasked128_512,
+               ssa.OpAMD64VPMOVSDWMasked128_128,
+               ssa.OpAMD64VPMOVSDWMasked128_256,
                ssa.OpAMD64VPMOVSDWMasked256,
-               ssa.OpAMD64VPMOVSQWMasked128,
+               ssa.OpAMD64VPMOVSQWMasked128_128,
+               ssa.OpAMD64VPMOVSQWMasked128_256,
+               ssa.OpAMD64VPMOVSQWMasked128_512,
                ssa.OpAMD64VPACKSSDWMasked128,
                ssa.OpAMD64VPACKSSDWMasked128load,
                ssa.OpAMD64VPACKSSDWMasked256,
@@ -2624,9 +2721,11 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
                ssa.OpAMD64VPMOVSXBDMasked512,
                ssa.OpAMD64VPMOVSXWDMasked256,
                ssa.OpAMD64VPMOVSXWDMasked512,
-               ssa.OpAMD64VPMOVQDMasked128,
+               ssa.OpAMD64VPMOVQDMasked128_128,
+               ssa.OpAMD64VPMOVQDMasked128_256,
                ssa.OpAMD64VPMOVQDMasked256,
-               ssa.OpAMD64VPMOVSQDMasked128,
+               ssa.OpAMD64VPMOVSQDMasked128_128,
+               ssa.OpAMD64VPMOVSQDMasked128_256,
                ssa.OpAMD64VPMOVSQDMasked256,
                ssa.OpAMD64VPMOVSXBDMasked128,
                ssa.OpAMD64VPMOVSXWDMasked128,
@@ -2639,15 +2738,23 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
                ssa.OpAMD64VPMOVSXDQMasked128,
                ssa.OpAMD64VPMOVSXBQMasked256,
                ssa.OpAMD64VPMOVSXBQMasked512,
-               ssa.OpAMD64VPMOVUSWBMasked128,
+               ssa.OpAMD64VPMOVUSWBMasked128_128,
+               ssa.OpAMD64VPMOVUSWBMasked128_256,
                ssa.OpAMD64VPMOVUSWBMasked256,
-               ssa.OpAMD64VPMOVUSDBMasked128,
-               ssa.OpAMD64VPMOVUSQBMasked128,
+               ssa.OpAMD64VPMOVUSDBMasked128_128,
+               ssa.OpAMD64VPMOVUSDBMasked128_256,
+               ssa.OpAMD64VPMOVUSDBMasked128_512,
+               ssa.OpAMD64VPMOVUSQBMasked128_128,
+               ssa.OpAMD64VPMOVUSQBMasked128_256,
+               ssa.OpAMD64VPMOVUSQBMasked128_512,
                ssa.OpAMD64VPMOVZXBWMasked256,
                ssa.OpAMD64VPMOVZXBWMasked512,
-               ssa.OpAMD64VPMOVUSDWMasked128,
+               ssa.OpAMD64VPMOVUSDWMasked128_128,
+               ssa.OpAMD64VPMOVUSDWMasked128_256,
                ssa.OpAMD64VPMOVUSDWMasked256,
-               ssa.OpAMD64VPMOVUSQWMasked128,
+               ssa.OpAMD64VPMOVUSQWMasked128_128,
+               ssa.OpAMD64VPMOVUSQWMasked128_256,
+               ssa.OpAMD64VPMOVUSQWMasked128_512,
                ssa.OpAMD64VPACKUSDWMasked128,
                ssa.OpAMD64VPACKUSDWMasked128load,
                ssa.OpAMD64VPACKUSDWMasked256,
@@ -2664,7 +2771,8 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
                ssa.OpAMD64VPMOVZXBDMasked512,
                ssa.OpAMD64VPMOVZXWDMasked256,
                ssa.OpAMD64VPMOVZXWDMasked512,
-               ssa.OpAMD64VPMOVUSQDMasked128,
+               ssa.OpAMD64VPMOVUSQDMasked128_128,
+               ssa.OpAMD64VPMOVUSQDMasked128_256,
                ssa.OpAMD64VPMOVUSQDMasked256,
                ssa.OpAMD64VPMOVZXBDMasked128,
                ssa.OpAMD64VPMOVZXWDMasked128,
index 8332af27383415ac6a0ed9d86c5123c6d4f6532f..7ba970ca4273bcfbdb57584c3dab13ff05089349 100644 (file)
 (CompressUint64x2 x mask) => (VPCOMPRESSQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
 (CompressUint64x4 x mask) => (VPCOMPRESSQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
 (CompressUint64x8 x mask) => (VPCOMPRESSQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ConvertToInt8Int16x8 ...) => (VPMOVWB128 ...)
-(ConvertToInt8Int16x16 ...) => (VPMOVWB128 ...)
+(ConvertToInt8Int16x8 ...) => (VPMOVWB128_128 ...)
+(ConvertToInt8Int16x16 ...) => (VPMOVWB128_256 ...)
 (ConvertToInt8Int16x32 ...) => (VPMOVWB256 ...)
-(ConvertToInt8Int32x4 ...) => (VPMOVDB128 ...)
-(ConvertToInt8Int32x8 ...) => (VPMOVDB128 ...)
-(ConvertToInt8Int32x16 ...) => (VPMOVDB128 ...)
-(ConvertToInt8Int64x2 ...) => (VPMOVQB128 ...)
-(ConvertToInt8Int64x4 ...) => (VPMOVQB128 ...)
-(ConvertToInt8Int64x8 ...) => (VPMOVQB128 ...)
-(ConvertToInt8SaturatedInt16x8 ...) => (VPMOVSWB128 ...)
-(ConvertToInt8SaturatedInt16x16 ...) => (VPMOVSWB128 ...)
+(ConvertToInt8Int32x4 ...) => (VPMOVDB128_128 ...)
+(ConvertToInt8Int32x8 ...) => (VPMOVDB128_256 ...)
+(ConvertToInt8Int32x16 ...) => (VPMOVDB128_512 ...)
+(ConvertToInt8Int64x2 ...) => (VPMOVQB128_128 ...)
+(ConvertToInt8Int64x4 ...) => (VPMOVQB128_256 ...)
+(ConvertToInt8Int64x8 ...) => (VPMOVQB128_512 ...)
+(ConvertToInt8SaturatedInt16x8 ...) => (VPMOVSWB128_128 ...)
+(ConvertToInt8SaturatedInt16x16 ...) => (VPMOVSWB128_256 ...)
 (ConvertToInt8SaturatedInt16x32 ...) => (VPMOVSWB256 ...)
-(ConvertToInt8SaturatedInt32x4 ...) => (VPMOVSDB128 ...)
-(ConvertToInt8SaturatedInt32x8 ...) => (VPMOVSDB128 ...)
-(ConvertToInt8SaturatedInt32x16 ...) => (VPMOVSDB128 ...)
-(ConvertToInt8SaturatedInt64x2 ...) => (VPMOVSQB128 ...)
-(ConvertToInt8SaturatedInt64x4 ...) => (VPMOVSQB128 ...)
-(ConvertToInt8SaturatedInt64x8 ...) => (VPMOVSQB128 ...)
+(ConvertToInt8SaturatedInt32x4 ...) => (VPMOVSDB128_128 ...)
+(ConvertToInt8SaturatedInt32x8 ...) => (VPMOVSDB128_256 ...)
+(ConvertToInt8SaturatedInt32x16 ...) => (VPMOVSDB128_512 ...)
+(ConvertToInt8SaturatedInt64x2 ...) => (VPMOVSQB128_128 ...)
+(ConvertToInt8SaturatedInt64x4 ...) => (VPMOVSQB128_256 ...)
+(ConvertToInt8SaturatedInt64x8 ...) => (VPMOVSQB128_512 ...)
 (ConvertToInt16Int8x16 ...) => (VPMOVSXBW256 ...)
 (ConvertToInt16Int8x32 ...) => (VPMOVSXBW512 ...)
-(ConvertToInt16Int32x4 ...) => (VPMOVDW128 ...)
-(ConvertToInt16Int32x8 ...) => (VPMOVDW128 ...)
+(ConvertToInt16Int32x4 ...) => (VPMOVDW128_128 ...)
+(ConvertToInt16Int32x8 ...) => (VPMOVDW128_256 ...)
 (ConvertToInt16Int32x16 ...) => (VPMOVDW256 ...)
-(ConvertToInt16Int64x2 ...) => (VPMOVQW128 ...)
-(ConvertToInt16Int64x4 ...) => (VPMOVQW128 ...)
-(ConvertToInt16Int64x8 ...) => (VPMOVQW128 ...)
-(ConvertToInt16SaturatedInt32x4 ...) => (VPMOVSDW128 ...)
-(ConvertToInt16SaturatedInt32x8 ...) => (VPMOVSDW128 ...)
+(ConvertToInt16Int64x2 ...) => (VPMOVQW128_128 ...)
+(ConvertToInt16Int64x4 ...) => (VPMOVQW128_256 ...)
+(ConvertToInt16Int64x8 ...) => (VPMOVQW128_512 ...)
+(ConvertToInt16SaturatedInt32x4 ...) => (VPMOVSDW128_128 ...)
+(ConvertToInt16SaturatedInt32x8 ...) => (VPMOVSDW128_256 ...)
 (ConvertToInt16SaturatedInt32x16 ...) => (VPMOVSDW256 ...)
-(ConvertToInt16SaturatedInt64x2 ...) => (VPMOVSQW128 ...)
-(ConvertToInt16SaturatedInt64x4 ...) => (VPMOVSQW128 ...)
-(ConvertToInt16SaturatedInt64x8 ...) => (VPMOVSQW128 ...)
+(ConvertToInt16SaturatedInt64x2 ...) => (VPMOVSQW128_128 ...)
+(ConvertToInt16SaturatedInt64x4 ...) => (VPMOVSQW128_256 ...)
+(ConvertToInt16SaturatedInt64x8 ...) => (VPMOVSQW128_512 ...)
 (ConvertToInt16SaturatedPackedInt32x4 ...) => (VPACKSSDW128 ...)
 (ConvertToInt16SaturatedPackedInt32x8 ...) => (VPACKSSDW256 ...)
 (ConvertToInt16SaturatedPackedInt32x16 ...) => (VPACKSSDW512 ...)
 (ConvertToInt32Int8x16 ...) => (VPMOVSXBD512 ...)
 (ConvertToInt32Int16x8 ...) => (VPMOVSXWD256 ...)
 (ConvertToInt32Int16x16 ...) => (VPMOVSXWD512 ...)
-(ConvertToInt32Int64x2 ...) => (VPMOVQD128 ...)
-(ConvertToInt32Int64x4 ...) => (VPMOVQD128 ...)
+(ConvertToInt32Int64x2 ...) => (VPMOVQD128_128 ...)
+(ConvertToInt32Int64x4 ...) => (VPMOVQD128_256 ...)
 (ConvertToInt32Int64x8 ...) => (VPMOVQD256 ...)
-(ConvertToInt32SaturatedInt64x2 ...) => (VPMOVSQD128 ...)
-(ConvertToInt32SaturatedInt64x4 ...) => (VPMOVSQD128 ...)
+(ConvertToInt32SaturatedInt64x2 ...) => (VPMOVSQD128_128 ...)
+(ConvertToInt32SaturatedInt64x4 ...) => (VPMOVSQD128_256 ...)
 (ConvertToInt32SaturatedInt64x8 ...) => (VPMOVSQD256 ...)
 (ConvertToInt32x4Int8x16 ...) => (VPMOVSXBD128 ...)
 (ConvertToInt32x4Int16x8 ...) => (VPMOVSXWD128 ...)
 (ConvertToInt64x2Int32x4 ...) => (VPMOVSXDQ128 ...)
 (ConvertToInt64x4Int8x16 ...) => (VPMOVSXBQ256 ...)
 (ConvertToInt64x8Int8x16 ...) => (VPMOVSXBQ512 ...)
-(ConvertToUint8Uint16x8 ...) => (VPMOVWB128 ...)
-(ConvertToUint8Uint16x16 ...) => (VPMOVWB128 ...)
+(ConvertToUint8Uint16x8 ...) => (VPMOVWB128_128 ...)
+(ConvertToUint8Uint16x16 ...) => (VPMOVWB128_256 ...)
 (ConvertToUint8Uint16x32 ...) => (VPMOVWB256 ...)
-(ConvertToUint8Uint32x4 ...) => (VPMOVDB128 ...)
-(ConvertToUint8Uint32x8 ...) => (VPMOVDB128 ...)
-(ConvertToUint8Uint32x16 ...) => (VPMOVDB128 ...)
-(ConvertToUint8Uint64x2 ...) => (VPMOVQB128 ...)
-(ConvertToUint8Uint64x4 ...) => (VPMOVQB128 ...)
-(ConvertToUint8Uint64x8 ...) => (VPMOVQB128 ...)
-(ConvertToUint8SaturatedUint16x8 ...) => (VPMOVUSWB128 ...)
-(ConvertToUint8SaturatedUint16x16 ...) => (VPMOVUSWB128 ...)
+(ConvertToUint8Uint32x4 ...) => (VPMOVDB128_128 ...)
+(ConvertToUint8Uint32x8 ...) => (VPMOVDB128_256 ...)
+(ConvertToUint8Uint32x16 ...) => (VPMOVDB128_512 ...)
+(ConvertToUint8Uint64x2 ...) => (VPMOVQB128_128 ...)
+(ConvertToUint8Uint64x4 ...) => (VPMOVQB128_256 ...)
+(ConvertToUint8Uint64x8 ...) => (VPMOVQB128_512 ...)
+(ConvertToUint8SaturatedUint16x8 ...) => (VPMOVUSWB128_128 ...)
+(ConvertToUint8SaturatedUint16x16 ...) => (VPMOVUSWB128_256 ...)
 (ConvertToUint8SaturatedUint16x32 ...) => (VPMOVUSWB256 ...)
-(ConvertToUint8SaturatedUint32x4 ...) => (VPMOVUSDB128 ...)
-(ConvertToUint8SaturatedUint32x8 ...) => (VPMOVUSDB128 ...)
-(ConvertToUint8SaturatedUint32x16 ...) => (VPMOVUSDB128 ...)
-(ConvertToUint8SaturatedUint64x2 ...) => (VPMOVUSQB128 ...)
-(ConvertToUint8SaturatedUint64x4 ...) => (VPMOVUSQB128 ...)
-(ConvertToUint8SaturatedUint64x8 ...) => (VPMOVUSQB128 ...)
+(ConvertToUint8SaturatedUint32x4 ...) => (VPMOVUSDB128_128 ...)
+(ConvertToUint8SaturatedUint32x8 ...) => (VPMOVUSDB128_256 ...)
+(ConvertToUint8SaturatedUint32x16 ...) => (VPMOVUSDB128_512 ...)
+(ConvertToUint8SaturatedUint64x2 ...) => (VPMOVUSQB128_128 ...)
+(ConvertToUint8SaturatedUint64x4 ...) => (VPMOVUSQB128_256 ...)
+(ConvertToUint8SaturatedUint64x8 ...) => (VPMOVUSQB128_512 ...)
 (ConvertToUint16Uint8x16 ...) => (VPMOVZXBW256 ...)
 (ConvertToUint16Uint8x32 ...) => (VPMOVZXBW512 ...)
-(ConvertToUint16Uint32x4 ...) => (VPMOVDW128 ...)
-(ConvertToUint16Uint32x8 ...) => (VPMOVDW128 ...)
+(ConvertToUint16Uint32x4 ...) => (VPMOVDW128_128 ...)
+(ConvertToUint16Uint32x8 ...) => (VPMOVDW128_256 ...)
 (ConvertToUint16Uint32x16 ...) => (VPMOVDW256 ...)
-(ConvertToUint16Uint64x2 ...) => (VPMOVQW128 ...)
-(ConvertToUint16Uint64x4 ...) => (VPMOVQW128 ...)
-(ConvertToUint16Uint64x8 ...) => (VPMOVQW128 ...)
-(ConvertToUint16SaturatedUint32x4 ...) => (VPMOVUSDW128 ...)
-(ConvertToUint16SaturatedUint32x8 ...) => (VPMOVUSDW128 ...)
+(ConvertToUint16Uint64x2 ...) => (VPMOVQW128_128 ...)
+(ConvertToUint16Uint64x4 ...) => (VPMOVQW128_256 ...)
+(ConvertToUint16Uint64x8 ...) => (VPMOVQW128_512 ...)
+(ConvertToUint16SaturatedUint32x4 ...) => (VPMOVUSDW128_128 ...)
+(ConvertToUint16SaturatedUint32x8 ...) => (VPMOVUSDW128_256 ...)
 (ConvertToUint16SaturatedUint32x16 ...) => (VPMOVUSDW256 ...)
-(ConvertToUint16SaturatedUint64x2 ...) => (VPMOVUSQW128 ...)
-(ConvertToUint16SaturatedUint64x4 ...) => (VPMOVUSQW128 ...)
-(ConvertToUint16SaturatedUint64x8 ...) => (VPMOVUSQW128 ...)
+(ConvertToUint16SaturatedUint64x2 ...) => (VPMOVUSQW128_128 ...)
+(ConvertToUint16SaturatedUint64x4 ...) => (VPMOVUSQW128_256 ...)
+(ConvertToUint16SaturatedUint64x8 ...) => (VPMOVUSQW128_512 ...)
 (ConvertToUint16SaturatedPackedUint32x4 ...) => (VPACKUSDW128 ...)
 (ConvertToUint16SaturatedPackedUint32x8 ...) => (VPACKUSDW256 ...)
 (ConvertToUint16SaturatedPackedUint32x16 ...) => (VPACKUSDW512 ...)
 (ConvertToUint32Uint8x16 ...) => (VPMOVZXBD512 ...)
 (ConvertToUint32Uint16x8 ...) => (VPMOVZXWD256 ...)
 (ConvertToUint32Uint16x16 ...) => (VPMOVZXWD512 ...)
-(ConvertToUint32Uint64x2 ...) => (VPMOVQD128 ...)
-(ConvertToUint32Uint64x4 ...) => (VPMOVQD128 ...)
+(ConvertToUint32Uint64x2 ...) => (VPMOVQD128_128 ...)
+(ConvertToUint32Uint64x4 ...) => (VPMOVQD128_256 ...)
 (ConvertToUint32Uint64x8 ...) => (VPMOVQD256 ...)
-(ConvertToUint32SaturatedUint64x2 ...) => (VPMOVUSQD128 ...)
-(ConvertToUint32SaturatedUint64x4 ...) => (VPMOVUSQD128 ...)
+(ConvertToUint32SaturatedUint64x2 ...) => (VPMOVUSQD128_128 ...)
+(ConvertToUint32SaturatedUint64x4 ...) => (VPMOVUSQD128_256 ...)
 (ConvertToUint32SaturatedUint64x8 ...) => (VPMOVUSQD256 ...)
 (ConvertToUint32x4Uint8x16 ...) => (VPMOVZXBD128 ...)
 (ConvertToUint32x4Uint16x8 ...) => (VPMOVZXWD128 ...)
 (VMOVDQU64Masked128 (VREDUCEPD128 [a] x) mask) => (VREDUCEPDMasked128 [a] x mask)
 (VMOVDQU64Masked256 (VREDUCEPD256 [a] x) mask) => (VREDUCEPDMasked256 [a] x mask)
 (VMOVDQU64Masked512 (VREDUCEPD512 [a] x) mask) => (VREDUCEPDMasked512 [a] x mask)
-(VMOVDQU16Masked128 (VPMOVWB128 x) mask) => (VPMOVWBMasked128 x mask)
+(VMOVDQU16Masked128 (VPMOVWB128_128 x) mask) => (VPMOVWBMasked128_128 x mask)
+(VMOVDQU16Masked256 (VPMOVWB128_256 x) mask) => (VPMOVWBMasked128_256 x mask)
 (VMOVDQU16Masked256 (VPMOVWB256 x) mask) => (VPMOVWBMasked256 x mask)
-(VMOVDQU32Masked128 (VPMOVDB128 x) mask) => (VPMOVDBMasked128 x mask)
-(VMOVDQU64Masked128 (VPMOVQB128 x) mask) => (VPMOVQBMasked128 x mask)
-(VMOVDQU16Masked128 (VPMOVSWB128 x) mask) => (VPMOVSWBMasked128 x mask)
+(VMOVDQU32Masked128 (VPMOVDB128_128 x) mask) => (VPMOVDBMasked128_128 x mask)
+(VMOVDQU32Masked256 (VPMOVDB128_256 x) mask) => (VPMOVDBMasked128_256 x mask)
+(VMOVDQU32Masked512 (VPMOVDB128_512 x) mask) => (VPMOVDBMasked128_512 x mask)
+(VMOVDQU64Masked128 (VPMOVQB128_128 x) mask) => (VPMOVQBMasked128_128 x mask)
+(VMOVDQU64Masked256 (VPMOVQB128_256 x) mask) => (VPMOVQBMasked128_256 x mask)
+(VMOVDQU64Masked512 (VPMOVQB128_512 x) mask) => (VPMOVQBMasked128_512 x mask)
+(VMOVDQU16Masked128 (VPMOVSWB128_128 x) mask) => (VPMOVSWBMasked128_128 x mask)
+(VMOVDQU16Masked256 (VPMOVSWB128_256 x) mask) => (VPMOVSWBMasked128_256 x mask)
 (VMOVDQU16Masked256 (VPMOVSWB256 x) mask) => (VPMOVSWBMasked256 x mask)
-(VMOVDQU32Masked128 (VPMOVSDB128 x) mask) => (VPMOVSDBMasked128 x mask)
-(VMOVDQU64Masked128 (VPMOVSQB128 x) mask) => (VPMOVSQBMasked128 x mask)
+(VMOVDQU32Masked128 (VPMOVSDB128_128 x) mask) => (VPMOVSDBMasked128_128 x mask)
+(VMOVDQU32Masked256 (VPMOVSDB128_256 x) mask) => (VPMOVSDBMasked128_256 x mask)
+(VMOVDQU32Masked512 (VPMOVSDB128_512 x) mask) => (VPMOVSDBMasked128_512 x mask)
+(VMOVDQU64Masked128 (VPMOVSQB128_128 x) mask) => (VPMOVSQBMasked128_128 x mask)
+(VMOVDQU64Masked256 (VPMOVSQB128_256 x) mask) => (VPMOVSQBMasked128_256 x mask)
+(VMOVDQU64Masked512 (VPMOVSQB128_512 x) mask) => (VPMOVSQBMasked128_512 x mask)
 (VMOVDQU8Masked256 (VPMOVSXBW256 x) mask) => (VPMOVSXBWMasked256 x mask)
 (VMOVDQU8Masked512 (VPMOVSXBW512 x) mask) => (VPMOVSXBWMasked512 x mask)
-(VMOVDQU32Masked128 (VPMOVDW128 x) mask) => (VPMOVDWMasked128 x mask)
+(VMOVDQU32Masked128 (VPMOVDW128_128 x) mask) => (VPMOVDWMasked128_128 x mask)
+(VMOVDQU32Masked256 (VPMOVDW128_256 x) mask) => (VPMOVDWMasked128_256 x mask)
 (VMOVDQU32Masked256 (VPMOVDW256 x) mask) => (VPMOVDWMasked256 x mask)
-(VMOVDQU64Masked128 (VPMOVQW128 x) mask) => (VPMOVQWMasked128 x mask)
-(VMOVDQU32Masked128 (VPMOVSDW128 x) mask) => (VPMOVSDWMasked128 x mask)
+(VMOVDQU64Masked128 (VPMOVQW128_128 x) mask) => (VPMOVQWMasked128_128 x mask)
+(VMOVDQU64Masked256 (VPMOVQW128_256 x) mask) => (VPMOVQWMasked128_256 x mask)
+(VMOVDQU64Masked512 (VPMOVQW128_512 x) mask) => (VPMOVQWMasked128_512 x mask)
+(VMOVDQU32Masked128 (VPMOVSDW128_128 x) mask) => (VPMOVSDWMasked128_128 x mask)
+(VMOVDQU32Masked256 (VPMOVSDW128_256 x) mask) => (VPMOVSDWMasked128_256 x mask)
 (VMOVDQU32Masked256 (VPMOVSDW256 x) mask) => (VPMOVSDWMasked256 x mask)
-(VMOVDQU64Masked128 (VPMOVSQW128 x) mask) => (VPMOVSQWMasked128 x mask)
+(VMOVDQU64Masked128 (VPMOVSQW128_128 x) mask) => (VPMOVSQWMasked128_128 x mask)
+(VMOVDQU64Masked256 (VPMOVSQW128_256 x) mask) => (VPMOVSQWMasked128_256 x mask)
+(VMOVDQU64Masked512 (VPMOVSQW128_512 x) mask) => (VPMOVSQWMasked128_512 x mask)
 (VMOVDQU32Masked128 (VPACKSSDW128 x y) mask) => (VPACKSSDWMasked128 x y mask)
 (VMOVDQU32Masked256 (VPACKSSDW256 x y) mask) => (VPACKSSDWMasked256 x y mask)
 (VMOVDQU32Masked512 (VPACKSSDW512 x y) mask) => (VPACKSSDWMasked512 x y mask)
 (VMOVDQU8Masked512 (VPMOVSXBD512 x) mask) => (VPMOVSXBDMasked512 x mask)
 (VMOVDQU16Masked256 (VPMOVSXWD256 x) mask) => (VPMOVSXWDMasked256 x mask)
 (VMOVDQU16Masked512 (VPMOVSXWD512 x) mask) => (VPMOVSXWDMasked512 x mask)
-(VMOVDQU64Masked128 (VPMOVQD128 x) mask) => (VPMOVQDMasked128 x mask)
+(VMOVDQU64Masked128 (VPMOVQD128_128 x) mask) => (VPMOVQDMasked128_128 x mask)
+(VMOVDQU64Masked256 (VPMOVQD128_256 x) mask) => (VPMOVQDMasked128_256 x mask)
 (VMOVDQU64Masked256 (VPMOVQD256 x) mask) => (VPMOVQDMasked256 x mask)
-(VMOVDQU64Masked128 (VPMOVSQD128 x) mask) => (VPMOVSQDMasked128 x mask)
+(VMOVDQU64Masked128 (VPMOVSQD128_128 x) mask) => (VPMOVSQDMasked128_128 x mask)
+(VMOVDQU64Masked256 (VPMOVSQD128_256 x) mask) => (VPMOVSQDMasked128_256 x mask)
 (VMOVDQU64Masked256 (VPMOVSQD256 x) mask) => (VPMOVSQDMasked256 x mask)
 (VMOVDQU8Masked128 (VPMOVSXBD128 x) mask) => (VPMOVSXBDMasked128 x mask)
 (VMOVDQU16Masked128 (VPMOVSXWD128 x) mask) => (VPMOVSXWDMasked128 x mask)
 (VMOVDQU32Masked128 (VPMOVSXDQ128 x) mask) => (VPMOVSXDQMasked128 x mask)
 (VMOVDQU8Masked256 (VPMOVSXBQ256 x) mask) => (VPMOVSXBQMasked256 x mask)
 (VMOVDQU8Masked512 (VPMOVSXBQ512 x) mask) => (VPMOVSXBQMasked512 x mask)
-(VMOVDQU16Masked128 (VPMOVUSWB128 x) mask) => (VPMOVUSWBMasked128 x mask)
+(VMOVDQU16Masked128 (VPMOVUSWB128_128 x) mask) => (VPMOVUSWBMasked128_128 x mask)
+(VMOVDQU16Masked256 (VPMOVUSWB128_256 x) mask) => (VPMOVUSWBMasked128_256 x mask)
 (VMOVDQU16Masked256 (VPMOVUSWB256 x) mask) => (VPMOVUSWBMasked256 x mask)
-(VMOVDQU32Masked128 (VPMOVUSDB128 x) mask) => (VPMOVUSDBMasked128 x mask)
-(VMOVDQU64Masked128 (VPMOVUSQB128 x) mask) => (VPMOVUSQBMasked128 x mask)
+(VMOVDQU32Masked128 (VPMOVUSDB128_128 x) mask) => (VPMOVUSDBMasked128_128 x mask)
+(VMOVDQU32Masked256 (VPMOVUSDB128_256 x) mask) => (VPMOVUSDBMasked128_256 x mask)
+(VMOVDQU32Masked512 (VPMOVUSDB128_512 x) mask) => (VPMOVUSDBMasked128_512 x mask)
+(VMOVDQU64Masked128 (VPMOVUSQB128_128 x) mask) => (VPMOVUSQBMasked128_128 x mask)
+(VMOVDQU64Masked256 (VPMOVUSQB128_256 x) mask) => (VPMOVUSQBMasked128_256 x mask)
+(VMOVDQU64Masked512 (VPMOVUSQB128_512 x) mask) => (VPMOVUSQBMasked128_512 x mask)
 (VMOVDQU8Masked256 (VPMOVZXBW256 x) mask) => (VPMOVZXBWMasked256 x mask)
 (VMOVDQU8Masked512 (VPMOVZXBW512 x) mask) => (VPMOVZXBWMasked512 x mask)
-(VMOVDQU32Masked128 (VPMOVUSDW128 x) mask) => (VPMOVUSDWMasked128 x mask)
+(VMOVDQU32Masked128 (VPMOVUSDW128_128 x) mask) => (VPMOVUSDWMasked128_128 x mask)
+(VMOVDQU32Masked256 (VPMOVUSDW128_256 x) mask) => (VPMOVUSDWMasked128_256 x mask)
 (VMOVDQU32Masked256 (VPMOVUSDW256 x) mask) => (VPMOVUSDWMasked256 x mask)
-(VMOVDQU64Masked128 (VPMOVUSQW128 x) mask) => (VPMOVUSQWMasked128 x mask)
+(VMOVDQU64Masked128 (VPMOVUSQW128_128 x) mask) => (VPMOVUSQWMasked128_128 x mask)
+(VMOVDQU64Masked256 (VPMOVUSQW128_256 x) mask) => (VPMOVUSQWMasked128_256 x mask)
+(VMOVDQU64Masked512 (VPMOVUSQW128_512 x) mask) => (VPMOVUSQWMasked128_512 x mask)
 (VMOVDQU32Masked128 (VPACKUSDW128 x y) mask) => (VPACKUSDWMasked128 x y mask)
 (VMOVDQU32Masked256 (VPACKUSDW256 x y) mask) => (VPACKUSDWMasked256 x y mask)
 (VMOVDQU32Masked512 (VPACKUSDW512 x y) mask) => (VPACKUSDWMasked512 x y mask)
 (VMOVDQU8Masked512 (VPMOVZXBD512 x) mask) => (VPMOVZXBDMasked512 x mask)
 (VMOVDQU16Masked256 (VPMOVZXWD256 x) mask) => (VPMOVZXWDMasked256 x mask)
 (VMOVDQU16Masked512 (VPMOVZXWD512 x) mask) => (VPMOVZXWDMasked512 x mask)
-(VMOVDQU64Masked128 (VPMOVUSQD128 x) mask) => (VPMOVUSQDMasked128 x mask)
+(VMOVDQU64Masked128 (VPMOVUSQD128_128 x) mask) => (VPMOVUSQDMasked128_128 x mask)
+(VMOVDQU64Masked256 (VPMOVUSQD128_256 x) mask) => (VPMOVUSQDMasked128_256 x mask)
 (VMOVDQU64Masked256 (VPMOVUSQD256 x) mask) => (VPMOVUSQDMasked256 x mask)
 (VMOVDQU8Masked128 (VPMOVZXBD128 x) mask) => (VPMOVZXBDMasked128 x mask)
 (VMOVDQU16Masked128 (VPMOVZXWD128 x) mask) => (VPMOVZXWDMasked128 x mask)
 (VMOVDQU64Masked128 (VPSRAQ128const [a] x) mask) => (VPSRAQMasked128const [a] x mask)
 (VMOVDQU64Masked256 (VPSRAQ256const [a] x) mask) => (VPSRAQMasked256const [a] x mask)
 (VMOVDQU64Masked512 (VPSRAQ512const [a] x) mask) => (VPSRAQMasked512const [a] x mask)
-(VPBLENDMQMasked512 dst (VPSLLQ512const [a] x) mask) => (VPSLLQMasked512constMerging dst [a] x mask)
-(VPBLENDVB256 dst (VPMOVSXBW512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBWMasked512Merging dst x (VPMOVVec8x32ToM <types.TypeMask> mask))
-(VPBLENDMDMasked512 dst (VPMOVSDW256 x) mask) => (VPMOVSDWMasked256Merging dst x mask)
-(VPBLENDMDMasked512 dst (VPLZCNTD512 x) mask) => (VPLZCNTDMasked512Merging dst x mask)
-(VPBLENDMWMasked512 dst (VPMAXSW512 x y) mask) => (VPMAXSWMasked512Merging dst x y mask)
-(VPBLENDVB128 dst (VPMINUD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(VPBLENDMWMasked512 dst (VPMULHW512 x y) mask) => (VPMULHWMasked512Merging dst x y mask)
-(VPBLENDMDMasked512 dst (VPMULLD512 x y) mask) => (VPMULLDMasked512Merging dst x y mask)
-(VPBLENDVB128 dst (VPROLQ128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLQMasked128Merging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPMADDUBSW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMADDUBSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPMAXSB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPADDSB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPADDUSB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDUSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VBROADCASTSS256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VBROADCASTSSMasked256Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPMOVSXBW128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBWMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPMINSQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VMULPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMULPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
 (VPBLENDMBMasked512 dst (VGF2P8MULB512 x y) mask) => (VGF2P8MULBMasked512Merging dst x y mask)
-(VPBLENDMDMasked512 dst (VMAXPS512 x y) mask) => (VMAXPSMasked512Merging dst x y mask)
-(VPBLENDVB256 dst (VPOPCNTB256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPOPCNTBMasked256Merging dst x (VPMOVVec8x32ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VSUBPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSUBPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(VPBLENDMQMasked512 dst (VPSUBQ512 x y) mask) => (VPSUBQMasked512Merging dst x y mask)
-(VPBLENDVB128 dst (VPSUBUSW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBUSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPMOVSXBQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBQMasked512Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(VPBLENDMDMasked512 dst (VPMOVUSDB128 x) mask) => (VPMOVUSDBMasked128Merging dst x mask)
-(VPBLENDVB256 dst (VPMAXUQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(VPBLENDMDMasked512 dst (VRSQRT14PS512 x) mask) => (VRSQRT14PSMasked512Merging dst x mask)
-(VPBLENDVB256 dst (VPROLD256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLDMasked256Merging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(VPBLENDMQMasked512 dst (VPROLQ512 [a] x) mask) => (VPROLQMasked512Merging dst [a] x mask)
-(VPBLENDMQMasked512 dst (VPSLLVQ512 x y) mask) => (VPSLLVQMasked512Merging dst x y mask)
-(VPBLENDVB256 dst (VPSRAVD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VADDPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VADDPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPMOVSXDQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXDQMasked512Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPMOVUSWB128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSWBMasked128Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPMOVZXWQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXWQMasked256Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPMULLW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULLWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(VPBLENDMBMasked512 dst (VPABSB512 x) mask) => (VPABSBMasked512Merging dst x mask)
+(VPBLENDMBMasked512 dst (VPADDB512 x y) mask) => (VPADDBMasked512Merging dst x y mask)
+(VPBLENDMBMasked512 dst (VPADDSB512 x y) mask) => (VPADDSBMasked512Merging dst x y mask)
+(VPBLENDMBMasked512 dst (VPADDUSB512 x y) mask) => (VPADDUSBMasked512Merging dst x y mask)
+(VPBLENDMBMasked512 dst (VPAVGB512 x y) mask) => (VPAVGBMasked512Merging dst x y mask)
+(VPBLENDMBMasked512 dst (VPMAXSB512 x y) mask) => (VPMAXSBMasked512Merging dst x y mask)
+(VPBLENDMBMasked512 dst (VPMAXUB512 x y) mask) => (VPMAXUBMasked512Merging dst x y mask)
+(VPBLENDMBMasked512 dst (VPMINSB512 x y) mask) => (VPMINSBMasked512Merging dst x y mask)
+(VPBLENDMBMasked512 dst (VPMINUB512 x y) mask) => (VPMINUBMasked512Merging dst x y mask)
 (VPBLENDMBMasked512 dst (VPOPCNTB512 x) mask) => (VPOPCNTBMasked512Merging dst x mask)
-(VPBLENDVB128 dst (VPSHLDQ128 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHLDQMasked128Merging dst [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPSRAQ256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAQMasked256constMerging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(VPBLENDMBMasked512 dst (VPSHUFB512 x y) mask) => (VPSHUFBMasked512Merging dst x y mask)
+(VPBLENDMBMasked512 dst (VPSUBB512 x y) mask) => (VPSUBBMasked512Merging dst x y mask)
+(VPBLENDMBMasked512 dst (VPSUBSB512 x y) mask) => (VPSUBSBMasked512Merging dst x y mask)
+(VPBLENDMBMasked512 dst (VPSUBUSB512 x y) mask) => (VPSUBUSBMasked512Merging dst x y mask)
+(VPBLENDMDMasked512 dst (VADDPS512 x y) mask) => (VADDPSMasked512Merging dst x y mask)
+(VPBLENDMDMasked512 dst (VCVTPS2UDQ512 x) mask) => (VCVTPS2UDQMasked512Merging dst x mask)
+(VPBLENDMDMasked512 dst (VCVTTPS2DQ512 x) mask) => (VCVTTPS2DQMasked512Merging dst x mask)
+(VPBLENDMDMasked512 dst (VDIVPS512 x y) mask) => (VDIVPSMasked512Merging dst x y mask)
+(VPBLENDMDMasked512 dst (VMAXPS512 x y) mask) => (VMAXPSMasked512Merging dst x y mask)
+(VPBLENDMDMasked512 dst (VMINPS512 x y) mask) => (VMINPSMasked512Merging dst x y mask)
+(VPBLENDMDMasked512 dst (VMULPS512 x y) mask) => (VMULPSMasked512Merging dst x y mask)
+(VPBLENDMDMasked512 dst (VPABSD512 x) mask) => (VPABSDMasked512Merging dst x mask)
+(VPBLENDMDMasked512 dst (VPACKSSDW512 x y) mask) => (VPACKSSDWMasked512Merging dst x y mask)
+(VPBLENDMDMasked512 dst (VPACKUSDW512 x y) mask) => (VPACKUSDWMasked512Merging dst x y mask)
+(VPBLENDMDMasked512 dst (VPADDD512 x y) mask) => (VPADDDMasked512Merging dst x y mask)
+(VPBLENDMDMasked512 dst (VPANDD512 x y) mask) => (VPANDDMasked512Merging dst x y mask)
+(VPBLENDMDMasked512 dst (VPLZCNTD512 x) mask) => (VPLZCNTDMasked512Merging dst x mask)
+(VPBLENDMDMasked512 dst (VPMAXSD512 x y) mask) => (VPMAXSDMasked512Merging dst x y mask)
+(VPBLENDMDMasked512 dst (VPMAXUD512 x y) mask) => (VPMAXUDMasked512Merging dst x y mask)
+(VPBLENDMDMasked512 dst (VPMINSD512 x y) mask) => (VPMINSDMasked512Merging dst x y mask)
+(VPBLENDMDMasked512 dst (VPMINUD512 x y) mask) => (VPMINUDMasked512Merging dst x y mask)
+(VPBLENDMDMasked512 dst (VPMOVDB128_512 x) mask) => (VPMOVDBMasked128_512Merging dst x mask)
 (VPBLENDMDMasked512 dst (VPMOVDW256 x) mask) => (VPMOVDWMasked256Merging dst x mask)
-(VPBLENDMQMasked512 dst (VPMOVUSQB128 x) mask) => (VPMOVUSQBMasked128Merging dst x mask)
-(VPBLENDVB256 dst (VCVTPS2UDQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VCVTPS2UDQMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPMOVZXBQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBQMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPMAXSQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPMINSW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPOPCNTW128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPOPCNTWMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
+(VPBLENDMDMasked512 dst (VPMOVSDB128_512 x) mask) => (VPMOVSDBMasked128_512Merging dst x mask)
+(VPBLENDMDMasked512 dst (VPMOVSDW256 x) mask) => (VPMOVSDWMasked256Merging dst x mask)
+(VPBLENDMDMasked512 dst (VPMOVUSDB128_512 x) mask) => (VPMOVUSDBMasked128_512Merging dst x mask)
+(VPBLENDMDMasked512 dst (VPMOVUSDW256 x) mask) => (VPMOVUSDWMasked256Merging dst x mask)
+(VPBLENDMDMasked512 dst (VPMULLD512 x y) mask) => (VPMULLDMasked512Merging dst x y mask)
+(VPBLENDMDMasked512 dst (VPOPCNTD512 x) mask) => (VPOPCNTDMasked512Merging dst x mask)
+(VPBLENDMDMasked512 dst (VPORD512 x y) mask) => (VPORDMasked512Merging dst x y mask)
+(VPBLENDMDMasked512 dst (VPROLD512 [a] x) mask) => (VPROLDMasked512Merging dst [a] x mask)
+(VPBLENDMDMasked512 dst (VPROLVD512 x y) mask) => (VPROLVDMasked512Merging dst x y mask)
+(VPBLENDMDMasked512 dst (VPRORD512 [a] x) mask) => (VPRORDMasked512Merging dst [a] x mask)
+(VPBLENDMDMasked512 dst (VPRORVD512 x y) mask) => (VPRORVDMasked512Merging dst x y mask)
+(VPBLENDMDMasked512 dst (VPSHLDD512 [a] x y) mask) => (VPSHLDDMasked512Merging dst [a] x y mask)
+(VPBLENDMDMasked512 dst (VPSHRDD512 [a] x y) mask) => (VPSHRDDMasked512Merging dst [a] x y mask)
+(VPBLENDMDMasked512 dst (VPSHUFD512 [a] x) mask) => (VPSHUFDMasked512Merging dst [a] x mask)
+(VPBLENDMDMasked512 dst (VPSLLD512const [a] x) mask) => (VPSLLDMasked512constMerging dst [a] x mask)
+(VPBLENDMDMasked512 dst (VPSLLVD512 x y) mask) => (VPSLLVDMasked512Merging dst x y mask)
+(VPBLENDMDMasked512 dst (VPSRAD512const [a] x) mask) => (VPSRADMasked512constMerging dst [a] x mask)
+(VPBLENDMDMasked512 dst (VPSRAVD512 x y) mask) => (VPSRAVDMasked512Merging dst x y mask)
+(VPBLENDMDMasked512 dst (VPSRLVD512 x y) mask) => (VPSRLVDMasked512Merging dst x y mask)
+(VPBLENDMDMasked512 dst (VPSUBD512 x y) mask) => (VPSUBDMasked512Merging dst x y mask)
+(VPBLENDMDMasked512 dst (VPXORD512 x y) mask) => (VPXORDMasked512Merging dst x y mask)
 (VPBLENDMDMasked512 dst (VRCP14PS512 x) mask) => (VRCP14PSMasked512Merging dst x mask)
-(VPBLENDVB128 dst (VPBROADCASTW128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTWMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
-(VPBLENDMWMasked512 dst (VPMOVWB256 x) mask) => (VPMOVWBMasked256Merging dst x mask)
-(VPBLENDVB128 dst (VPRORVD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORVDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPSHLDD256 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHLDDMasked256Merging dst [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPSLLVW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLVWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPSRLVQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPSUBUSB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBUSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
 (VPBLENDMDMasked512 dst (VREDUCEPS512 [a] x) mask) => (VREDUCEPSMasked512Merging dst [a] x mask)
-(VPBLENDVB256 dst (VPMAXSW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VMINPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMINPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(VPBLENDMDMasked512 dst (VRNDSCALEPS512 [a] x) mask) => (VRNDSCALEPSMasked512Merging dst [a] x mask)
+(VPBLENDMDMasked512 dst (VRSQRT14PS512 x) mask) => (VRSQRT14PSMasked512Merging dst x mask)
+(VPBLENDMDMasked512 dst (VSCALEFPS512 x y) mask) => (VSCALEFPSMasked512Merging dst x y mask)
+(VPBLENDMDMasked512 dst (VSQRTPS512 x) mask) => (VSQRTPSMasked512Merging dst x mask)
+(VPBLENDMDMasked512 dst (VSUBPS512 x y) mask) => (VSUBPSMasked512Merging dst x y mask)
+(VPBLENDMQMasked512 dst (VADDPD512 x y) mask) => (VADDPDMasked512Merging dst x y mask)
+(VPBLENDMQMasked512 dst (VDIVPD512 x y) mask) => (VDIVPDMasked512Merging dst x y mask)
+(VPBLENDMQMasked512 dst (VMAXPD512 x y) mask) => (VMAXPDMasked512Merging dst x y mask)
+(VPBLENDMQMasked512 dst (VMINPD512 x y) mask) => (VMINPDMasked512Merging dst x y mask)
+(VPBLENDMQMasked512 dst (VMULPD512 x y) mask) => (VMULPDMasked512Merging dst x y mask)
+(VPBLENDMQMasked512 dst (VPABSQ512 x) mask) => (VPABSQMasked512Merging dst x mask)
 (VPBLENDMQMasked512 dst (VPADDQ512 x y) mask) => (VPADDQMasked512Merging dst x y mask)
-(VPBLENDVB128 dst (VBROADCASTSD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VBROADCASTSDMasked256Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(VPBLENDMQMasked512 dst (VPANDQ512 x y) mask) => (VPANDQMasked512Merging dst x y mask)
+(VPBLENDMQMasked512 dst (VPLZCNTQ512 x) mask) => (VPLZCNTQMasked512Merging dst x mask)
+(VPBLENDMQMasked512 dst (VPMAXSQ512 x y) mask) => (VPMAXSQMasked512Merging dst x y mask)
+(VPBLENDMQMasked512 dst (VPMAXUQ512 x y) mask) => (VPMAXUQMasked512Merging dst x y mask)
+(VPBLENDMQMasked512 dst (VPMINSQ512 x y) mask) => (VPMINSQMasked512Merging dst x y mask)
+(VPBLENDMQMasked512 dst (VPMINUQ512 x y) mask) => (VPMINUQMasked512Merging dst x y mask)
+(VPBLENDMQMasked512 dst (VPMOVQB128_512 x) mask) => (VPMOVQBMasked128_512Merging dst x mask)
+(VPBLENDMQMasked512 dst (VPMOVQD256 x) mask) => (VPMOVQDMasked256Merging dst x mask)
+(VPBLENDMQMasked512 dst (VPMOVQW128_512 x) mask) => (VPMOVQWMasked128_512Merging dst x mask)
+(VPBLENDMQMasked512 dst (VPMOVSQB128_512 x) mask) => (VPMOVSQBMasked128_512Merging dst x mask)
+(VPBLENDMQMasked512 dst (VPMOVSQD256 x) mask) => (VPMOVSQDMasked256Merging dst x mask)
+(VPBLENDMQMasked512 dst (VPMOVSQW128_512 x) mask) => (VPMOVSQWMasked128_512Merging dst x mask)
+(VPBLENDMQMasked512 dst (VPMOVUSQB128_512 x) mask) => (VPMOVUSQBMasked128_512Merging dst x mask)
+(VPBLENDMQMasked512 dst (VPMOVUSQD256 x) mask) => (VPMOVUSQDMasked256Merging dst x mask)
+(VPBLENDMQMasked512 dst (VPMOVUSQW128_512 x) mask) => (VPMOVUSQWMasked128_512Merging dst x mask)
+(VPBLENDMQMasked512 dst (VPMULLQ512 x y) mask) => (VPMULLQMasked512Merging dst x y mask)
+(VPBLENDMQMasked512 dst (VPOPCNTQ512 x) mask) => (VPOPCNTQMasked512Merging dst x mask)
+(VPBLENDMQMasked512 dst (VPORQ512 x y) mask) => (VPORQMasked512Merging dst x y mask)
+(VPBLENDMQMasked512 dst (VPROLQ512 [a] x) mask) => (VPROLQMasked512Merging dst [a] x mask)
+(VPBLENDMQMasked512 dst (VPROLVQ512 x y) mask) => (VPROLVQMasked512Merging dst x y mask)
+(VPBLENDMQMasked512 dst (VPRORQ512 [a] x) mask) => (VPRORQMasked512Merging dst [a] x mask)
+(VPBLENDMQMasked512 dst (VPRORVQ512 x y) mask) => (VPRORVQMasked512Merging dst x y mask)
+(VPBLENDMQMasked512 dst (VPSHLDQ512 [a] x y) mask) => (VPSHLDQMasked512Merging dst [a] x y mask)
+(VPBLENDMQMasked512 dst (VPSHRDQ512 [a] x y) mask) => (VPSHRDQMasked512Merging dst [a] x y mask)
+(VPBLENDMQMasked512 dst (VPSLLQ512const [a] x) mask) => (VPSLLQMasked512constMerging dst [a] x mask)
+(VPBLENDMQMasked512 dst (VPSLLVQ512 x y) mask) => (VPSLLVQMasked512Merging dst x y mask)
+(VPBLENDMQMasked512 dst (VPSRAQ512const [a] x) mask) => (VPSRAQMasked512constMerging dst [a] x mask)
+(VPBLENDMQMasked512 dst (VPSRAVQ512 x y) mask) => (VPSRAVQMasked512Merging dst x y mask)
+(VPBLENDMQMasked512 dst (VPSRLVQ512 x y) mask) => (VPSRLVQMasked512Merging dst x y mask)
+(VPBLENDMQMasked512 dst (VPSUBQ512 x y) mask) => (VPSUBQMasked512Merging dst x y mask)
+(VPBLENDMQMasked512 dst (VPXORQ512 x y) mask) => (VPXORQMasked512Merging dst x y mask)
+(VPBLENDMQMasked512 dst (VRCP14PD512 x) mask) => (VRCP14PDMasked512Merging dst x mask)
+(VPBLENDMQMasked512 dst (VREDUCEPD512 [a] x) mask) => (VREDUCEPDMasked512Merging dst [a] x mask)
 (VPBLENDMQMasked512 dst (VRNDSCALEPD512 [a] x) mask) => (VRNDSCALEPDMasked512Merging dst [a] x mask)
-(VPBLENDVB128 dst (VPMOVZXDQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXDQMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPMINSD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPSRAQ128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAQMasked128constMerging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPADDSW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VRNDSCALEPS256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRNDSCALEPSMasked256Merging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPACKUSDW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPACKUSDWMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+(VPBLENDMQMasked512 dst (VRSQRT14PD512 x) mask) => (VRSQRT14PDMasked512Merging dst x mask)
+(VPBLENDMQMasked512 dst (VSCALEFPD512 x y) mask) => (VSCALEFPDMasked512Merging dst x y mask)
+(VPBLENDMQMasked512 dst (VSQRTPD512 x) mask) => (VSQRTPDMasked512Merging dst x mask)
+(VPBLENDMQMasked512 dst (VSUBPD512 x y) mask) => (VSUBPDMasked512Merging dst x y mask)
+(VPBLENDMWMasked512 dst (VPABSW512 x) mask) => (VPABSWMasked512Merging dst x mask)
+(VPBLENDMWMasked512 dst (VPADDSW512 x y) mask) => (VPADDSWMasked512Merging dst x y mask)
+(VPBLENDMWMasked512 dst (VPADDUSW512 x y) mask) => (VPADDUSWMasked512Merging dst x y mask)
+(VPBLENDMWMasked512 dst (VPADDW512 x y) mask) => (VPADDWMasked512Merging dst x y mask)
+(VPBLENDMWMasked512 dst (VPAVGW512 x y) mask) => (VPAVGWMasked512Merging dst x y mask)
 (VPBLENDMWMasked512 dst (VPMADDUBSW512 x y) mask) => (VPMADDUBSWMasked512Merging dst x y mask)
-(VPBLENDVB128 dst (VPLZCNTD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPLZCNTDMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPMAXUD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPOPCNTB128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPOPCNTBMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPROLVQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLVQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(VPBLENDMQMasked512 dst (VPABSQ512 x) mask) => (VPABSQMasked512Merging dst x mask)
-(VPBLENDVB128 dst (VBROADCASTSD512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VBROADCASTSDMasked512Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VMINPD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMINPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPMULHW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULHWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(VPBLENDMWMasked512 dst (VPMADDWD512 x y) mask) => (VPMADDWDMasked512Merging dst x y mask)
+(VPBLENDMWMasked512 dst (VPMAXSW512 x y) mask) => (VPMAXSWMasked512Merging dst x y mask)
+(VPBLENDMWMasked512 dst (VPMAXUW512 x y) mask) => (VPMAXUWMasked512Merging dst x y mask)
+(VPBLENDMWMasked512 dst (VPMINSW512 x y) mask) => (VPMINSWMasked512Merging dst x y mask)
+(VPBLENDMWMasked512 dst (VPMINUW512 x y) mask) => (VPMINUWMasked512Merging dst x y mask)
+(VPBLENDMWMasked512 dst (VPMOVSWB256 x) mask) => (VPMOVSWBMasked256Merging dst x mask)
+(VPBLENDMWMasked512 dst (VPMOVUSWB256 x) mask) => (VPMOVUSWBMasked256Merging dst x mask)
+(VPBLENDMWMasked512 dst (VPMOVWB256 x) mask) => (VPMOVWBMasked256Merging dst x mask)
+(VPBLENDMWMasked512 dst (VPMULHUW512 x y) mask) => (VPMULHUWMasked512Merging dst x y mask)
+(VPBLENDMWMasked512 dst (VPMULHW512 x y) mask) => (VPMULHWMasked512Merging dst x y mask)
+(VPBLENDMWMasked512 dst (VPMULLW512 x y) mask) => (VPMULLWMasked512Merging dst x y mask)
+(VPBLENDMWMasked512 dst (VPOPCNTW512 x) mask) => (VPOPCNTWMasked512Merging dst x mask)
 (VPBLENDMWMasked512 dst (VPSHLDW512 [a] x y) mask) => (VPSHLDWMasked512Merging dst [a] x y mask)
-(VPBLENDVB128 dst (VPSHRDW128 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHRDWMasked128Merging dst [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(VPBLENDMWMasked512 dst (VPSHRDW512 [a] x y) mask) => (VPSHRDWMasked512Merging dst [a] x y mask)
+(VPBLENDMWMasked512 dst (VPSHUFHW512 [a] x) mask) => (VPSHUFHWMasked512Merging dst [a] x mask)
+(VPBLENDMWMasked512 dst (VPSLLVW512 x y) mask) => (VPSLLVWMasked512Merging dst x y mask)
+(VPBLENDMWMasked512 dst (VPSLLW512const [a] x) mask) => (VPSLLWMasked512constMerging dst [a] x mask)
+(VPBLENDMWMasked512 dst (VPSRAVW512 x y) mask) => (VPSRAVWMasked512Merging dst x y mask)
+(VPBLENDMWMasked512 dst (VPSRAW512const [a] x) mask) => (VPSRAWMasked512constMerging dst [a] x mask)
+(VPBLENDMWMasked512 dst (VPSRLVW512 x y) mask) => (VPSRLVWMasked512Merging dst x y mask)
+(VPBLENDMWMasked512 dst (VPSUBSW512 x y) mask) => (VPSUBSWMasked512Merging dst x y mask)
+(VPBLENDMWMasked512 dst (VPSUBUSW512 x y) mask) => (VPSUBUSWMasked512Merging dst x y mask)
+(VPBLENDMWMasked512 dst (VPSUBW512 x y) mask) => (VPSUBWMasked512Merging dst x y mask)
 (VPBLENDVB128 dst (VADDPD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VADDPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPMOVZXWD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXWDMasked256Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPMOVSXWQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWQMasked256Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
-(VPBLENDMDMasked512 dst (VDIVPS512 x y) mask) => (VDIVPSMasked512Merging dst x y mask)
-(VPBLENDVB256 dst (VDIVPD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VDIVPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPLZCNTQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPLZCNTQMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPSUBSW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VREDUCEPD128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VREDUCEPDMasked128Merging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(VPBLENDMQMasked512 dst (VPMOVUSQD256 x) mask) => (VPMOVUSQDMasked256Merging dst x mask)
-(VPBLENDVB128 dst (VPMOVZXBD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBDMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(VPBLENDMWMasked512 dst (VPMULHUW512 x y) mask) => (VPMULHUWMasked512Merging dst x y mask)
-(VPBLENDVB128 dst (VPRORQ128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORQMasked128Merging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPSLLVW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLVWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPSRLVD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(VPBLENDMBMasked512 dst (VPSUBSB512 x y) mask) => (VPSUBSBMasked512Merging dst x y mask)
-(VPBLENDVB256 dst (VPADDD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPMOVSXBW256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBWMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPMOVSDW128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSDWMasked128Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPMINSD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
 (VPBLENDVB128 dst (VADDPS128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VADDPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(VPBLENDMQMasked512 dst (VADDPD512 x y) mask) => (VADDPDMasked512Merging dst x y mask)
-(VPBLENDVB128 dst (VPMOVSXBD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBDMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPMOVSXDQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXDQMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(VPBLENDMWMasked512 dst (VPMOVUSWB256 x) mask) => (VPMOVUSWBMasked256Merging dst x mask)
-(VPBLENDVB256 dst (VPOPCNTD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPOPCNTDMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPROLVD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLVDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPSRLVQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPADDUSW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDUSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPMAXSD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPMINUB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPMULLQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULLQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VSQRTPD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSQRTPDMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPSUBD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VREDUCEPS256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VREDUCEPSMasked256Merging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(VPBLENDMWMasked512 dst (VPMINSW512 x y) mask) => (VPMINSWMasked512Merging dst x y mask)
-(VPBLENDMQMasked512 dst (VRCP14PD512 x) mask) => (VRCP14PDMasked512Merging dst x mask)
-(VPBLENDMWMasked512 dst (VPSRAVW512 x y) mask) => (VPSRAVWMasked512Merging dst x y mask)
-(VPBLENDMDMasked512 dst (VPSRLVD512 x y) mask) => (VPSRLVDMasked512Merging dst x y mask)
-(VPBLENDMDMasked512 dst (VPSUBD512 x y) mask) => (VPSUBDMasked512Merging dst x y mask)
-(VPBLENDVB256 dst (VPSUBQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPBROADCASTD512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTDMasked512Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPMOVSXWD512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWDMasked512Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPMADDWD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMADDWDMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VGF2P8MULB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VGF2P8MULBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPROLD128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLDMasked128Merging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPSLLVD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPSRAD128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRADMasked128constMerging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPSRLVW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPSUBUSB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBUSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPADDUSB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDUSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPMOVZXBW128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBWMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPMOVZXDQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXDQMasked256Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPROLVQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VBROADCASTSD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VBROADCASTSDMasked256Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VBROADCASTSD512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VBROADCASTSDMasked512Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VBROADCASTSS128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VBROADCASTSSMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VBROADCASTSS256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VBROADCASTSSMasked256Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VBROADCASTSS512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VBROADCASTSSMasked512Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VCVTPS2UDQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VCVTPS2UDQMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VCVTTPS2DQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VCVTTPS2DQMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VDIVPD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VDIVPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VDIVPS128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VDIVPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VGF2P8MULB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VGF2P8MULBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VMAXPD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMAXPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VMAXPS128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMAXPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VMINPD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMINPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VMINPS128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMINPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VMULPD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMULPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VMULPS128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMULPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPABSB128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSBMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPABSD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSDMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPABSQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSQMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPABSW128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSWMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPACKSSDW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPACKSSDWMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPACKUSDW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPACKUSDWMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
 (VPBLENDVB128 dst (VPADDB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(VPBLENDMDMasked512 dst (VPROLD512 [a] x) mask) => (VPROLDMasked512Merging dst [a] x mask)
-(VPBLENDMQMasked512 dst (VPSRLVQ512 x y) mask) => (VPSRLVQMasked512Merging dst x y mask)
-(VPBLENDMBMasked512 dst (VPSUBB512 x y) mask) => (VPSUBBMasked512Merging dst x y mask)
-(VPBLENDVB256 dst (VPADDW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPADDD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
 (VPBLENDVB128 dst (VPADDQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPADDSB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPADDSW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPADDUSB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDUSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
 (VPBLENDVB128 dst (VPADDUSW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDUSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPADDW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPAVGB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPAVGBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPAVGW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPAVGWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
 (VPBLENDVB128 dst (VPBROADCASTB128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTBMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VRNDSCALEPS128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRNDSCALEPSMasked128Merging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VREDUCEPD256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VREDUCEPDMasked256Merging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPMINUW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(VPBLENDMDMasked512 dst (VPORD512 x y) mask) => (VPORDMasked512Merging dst x y mask)
-(VPBLENDVB256 dst (VRNDSCALEPD256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRNDSCALEPDMasked256Merging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPMINSW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPMULLD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULLDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPSHUFB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPRORD128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORDMasked128Merging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPRORVD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(VPBLENDMQMasked512 dst (VPRORVQ512 x y) mask) => (VPRORVQMasked512Merging dst x y mask)
-(VPBLENDVB256 dst (VPSHLDW256 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHLDWMasked256Merging dst [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VCVTTPS2DQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VCVTTPS2DQMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VCVTTPS2DQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VCVTTPS2DQMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VMINPS128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMINPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(VPBLENDMDMasked512 dst (VPSHLDD512 [a] x y) mask) => (VPSHLDDMasked512Merging dst [a] x y mask)
-(VPBLENDMQMasked512 dst (VPSRAVQ512 x y) mask) => (VPSRAVQMasked512Merging dst x y mask)
-(VPBLENDVB128 dst (VSUBPD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSUBPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VSUBPD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSUBPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPSUBD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(VPBLENDMWMasked512 dst (VPADDW512 x y) mask) => (VPADDWMasked512Merging dst x y mask)
-(VPBLENDMQMasked512 dst (VPANDQ512 x y) mask) => (VPANDQMasked512Merging dst x y mask)
+(VPBLENDVB128 dst (VPBROADCASTB256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTBMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
 (VPBLENDVB128 dst (VPBROADCASTB512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTBMasked512Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(VPBLENDMDMasked512 dst (VPACKUSDW512 x y) mask) => (VPACKUSDWMasked512Merging dst x y mask)
-(VPBLENDMWMasked512 dst (VPSHUFHW512 [a] x) mask) => (VPSHUFHWMasked512Merging dst [a] x mask)
-(VPBLENDVB128 dst (VRCP14PD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRCP14PDMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(VPBLENDMWMasked512 dst (VPSHRDW512 [a] x y) mask) => (VPSHRDWMasked512Merging dst [a] x y mask)
-(VPBLENDVB256 dst (VSQRTPS256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSQRTPSMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(VPBLENDMWMasked512 dst (VPSUBSW512 x y) mask) => (VPSUBSWMasked512Merging dst x y mask)
-(VPBLENDVB128 dst (VPMOVSXWD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWDMasked256Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPBROADCASTW256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTWMasked256Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPBROADCASTD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTDMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
 (VPBLENDVB128 dst (VPBROADCASTD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTDMasked256Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(VPBLENDMQMasked512 dst (VPMOVQB128 x) mask) => (VPMOVQBMasked128Merging dst x mask)
-(VPBLENDVB256 dst (VPACKUSDW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPACKUSDWMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(VPBLENDMBMasked512 dst (VPMINSB512 x y) mask) => (VPMINSBMasked512Merging dst x y mask)
-(VPBLENDVB256 dst (VPMULLD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULLDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPADDB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(VPBLENDMBMasked512 dst (VPADDB512 x y) mask) => (VPADDBMasked512Merging dst x y mask)
-(VPBLENDVB128 dst (VPADDD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPMOVWB128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVWBMasked128Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPMADDWD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMADDWDMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(VPBLENDMDMasked512 dst (VPMAXSD512 x y) mask) => (VPMAXSDMasked512Merging dst x y mask)
-(VPBLENDMQMasked512 dst (VPSHLDQ512 [a] x y) mask) => (VPSHLDQMasked512Merging dst [a] x y mask)
-(VPBLENDVB128 dst (VBROADCASTSS128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VBROADCASTSSMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPMOVQD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVQDMasked128Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPMOVSXDQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXDQMasked256Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(VPBLENDMQMasked512 dst (VDIVPD512 x y) mask) => (VDIVPDMasked512Merging dst x y mask)
-(VPBLENDMDMasked512 dst (VADDPS512 x y) mask) => (VADDPSMasked512Merging dst x y mask)
-(VPBLENDVB128 dst (VPMOVSXBD512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBDMasked512Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(VPBLENDMDMasked512 dst (VPMOVUSDW256 x) mask) => (VPMOVUSDWMasked256Merging dst x mask)
-(VPBLENDVB256 dst (VPMULHUW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULHUWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPMULLQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULLQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPROLVD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(VPBLENDMQMasked512 dst (VPROLVQ512 x y) mask) => (VPROLVQMasked512Merging dst x y mask)
-(VPBLENDVB128 dst (VPSHLDW128 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHLDWMasked128Merging dst [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPMOVUSDW128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSDWMasked128Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPMAXUQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPMULLW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULLWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPRORD256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORDMasked256Merging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(VPBLENDMQMasked512 dst (VPRORQ512 [a] x) mask) => (VPRORQMasked512Merging dst [a] x mask)
-(VPBLENDVB128 dst (VPSHLDD128 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHLDDMasked128Merging dst [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPSRAVW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VSUBPS128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSUBPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPBROADCASTD512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTDMasked512Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
 (VPBLENDVB128 dst (VPBROADCASTQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTQMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPMINUD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPSHUFD256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFDMasked256Merging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPRORVQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPSLLVQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLVQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(VPBLENDMWMasked512 dst (VPSUBUSW512 x y) mask) => (VPSUBUSWMasked512Merging dst x y mask)
-(VPBLENDMDMasked512 dst (VPMOVSDB128 x) mask) => (VPMOVSDBMasked128Merging dst x mask)
-(VPBLENDVB256 dst (VPMOVUSQD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSQDMasked128Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(VPBLENDMBMasked512 dst (VPMAXUB512 x y) mask) => (VPMAXUBMasked512Merging dst x y mask)
-(VPBLENDMQMasked512 dst (VPMINSQ512 x y) mask) => (VPMINSQMasked512Merging dst x y mask)
-(VPBLENDMQMasked512 dst (VSQRTPD512 x) mask) => (VSQRTPDMasked512Merging dst x mask)
-(VPBLENDMDMasked512 dst (VSUBPS512 x y) mask) => (VSUBPSMasked512Merging dst x y mask)
-(VPBLENDVB256 dst (VPSUBUSW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBUSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(VPBLENDMDMasked512 dst (VPMAXUD512 x y) mask) => (VPMAXUDMasked512Merging dst x y mask)
-(VPBLENDVB128 dst (VBROADCASTSS512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VBROADCASTSSMasked512Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(VPBLENDMQMasked512 dst (VPMOVSQD256 x) mask) => (VPMOVSQDMasked256Merging dst x mask)
-(VPBLENDVB128 dst (VPMOVZXBD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBDMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPMOVZXBQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBQMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VRSQRT14PD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRSQRT14PDMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(VPBLENDMDMasked512 dst (VPRORD512 [a] x) mask) => (VPRORDMasked512Merging dst [a] x mask)
-(VPBLENDMWMasked512 dst (VPSUBW512 x y) mask) => (VPSUBWMasked512Merging dst x y mask)
-(VPBLENDVB128 dst (VPABSW128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSWMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPADDSB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(VPBLENDMBMasked512 dst (VPADDUSB512 x y) mask) => (VPADDUSBMasked512Merging dst x y mask)
-(VPBLENDVB256 dst (VPMOVZXWD512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXWDMasked512Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask))
-(VPBLENDMQMasked512 dst (VMINPD512 x y) mask) => (VMINPDMasked512Merging dst x y mask)
-(VPBLENDMQMasked512 dst (VPMULLQ512 x y) mask) => (VPMULLQMasked512Merging dst x y mask)
-(VPBLENDMDMasked512 dst (VPROLVD512 x y) mask) => (VPROLVDMasked512Merging dst x y mask)
-(VPBLENDVB128 dst (VPSUBW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(VPBLENDMDMasked512 dst (VCVTTPS2DQ512 x) mask) => (VCVTTPS2DQMasked512Merging dst x mask)
-(VPBLENDVB128 dst (VPMOVZXWQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXWQMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
-(VPBLENDMWMasked512 dst (VPMADDWD512 x y) mask) => (VPMADDWDMasked512Merging dst x y mask)
-(VPBLENDVB128 dst (VGF2P8MULB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VGF2P8MULBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPROLQ256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLQMasked256Merging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(VPBLENDMWMasked512 dst (VPSLLVW512 x y) mask) => (VPSLLVWMasked512Merging dst x y mask)
-(VPBLENDVB128 dst (VPABSD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSDMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPAVGB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPAVGBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(VPBLENDMBMasked512 dst (VPAVGB512 x y) mask) => (VPAVGBMasked512Merging dst x y mask)
-(VPBLENDVB128 dst (VPBROADCASTB256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTBMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VMAXPD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMAXPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(VPBLENDMBMasked512 dst (VPMINUB512 x y) mask) => (VPMINUBMasked512Merging dst x y mask)
+(VPBLENDVB128 dst (VPBROADCASTQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTQMasked256Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPBROADCASTQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTQMasked512Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPBROADCASTW128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTWMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPBROADCASTW256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTWMasked256Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPBROADCASTW512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTWMasked512Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPLZCNTD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPLZCNTDMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPLZCNTQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPLZCNTQMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMADDUBSW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMADDUBSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMADDWD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMADDWDMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMAXSB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMAXSD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMAXSQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMAXSW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMAXUB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMAXUD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMAXUQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMAXUW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMINSB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMINSD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMINSQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMINSW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMINUB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMINUD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
 (VPBLENDVB128 dst (VPMINUQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VMULPS128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMULPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(VPBLENDMQMasked512 dst (VMAXPD512 x y) mask) => (VMAXPDMasked512Merging dst x y mask)
-(VPBLENDMBMasked512 dst (VPMAXSB512 x y) mask) => (VPMAXSBMasked512Merging dst x y mask)
-(VPBLENDVB128 dst (VPMULHUW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULHUWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VMULPD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMULPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPRORVQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORVQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPSUBB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(VPBLENDMDMasked512 dst (VPACKSSDW512 x y) mask) => (VPACKSSDWMasked512Merging dst x y mask)
-(VPBLENDVB128 dst (VCVTPS2UDQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VCVTPS2UDQMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPMOVZXDQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXDQMasked512Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPMINUB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(VPBLENDMDMasked512 dst (VPRORVD512 x y) mask) => (VPRORVDMasked512Merging dst x y mask)
-(VPBLENDVB128 dst (VSCALEFPS128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSCALEFPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPSLLVQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPSLLW256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLWMasked256constMerging dst [a] x (VPMOVVec16x16ToM <types.TypeMask> mask))
-(VPBLENDMWMasked512 dst (VPABSW512 x) mask) => (VPABSWMasked512Merging dst x mask)
+(VPBLENDVB128 dst (VPMINUW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMOVDB128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVDBMasked128_128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMOVDW128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVDWMasked128_128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMOVQB128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVQBMasked128_128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMOVQD128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVQDMasked128_128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMOVQW128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVQWMasked128_128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMOVSDB128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSDBMasked128_128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMOVSDW128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSDWMasked128_128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMOVSQB128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSQBMasked128_128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMOVSQD128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSQDMasked128_128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMOVSQW128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSQWMasked128_128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMOVSWB128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSWBMasked128_128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMOVSXBD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBDMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMOVSXBD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBDMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMOVSXBD512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBDMasked512Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMOVSXBQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBQMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
 (VPBLENDVB128 dst (VPMOVSXBQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBQMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VSCALEFPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSCALEFPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPSLLQ256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLQMasked256constMerging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPADDW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(VPBLENDMQMasked512 dst (VMULPD512 x y) mask) => (VMULPDMasked512Merging dst x y mask)
-(VPBLENDMQMasked512 dst (VPORQ512 x y) mask) => (VPORQMasked512Merging dst x y mask)
+(VPBLENDVB128 dst (VPMOVSXBQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBQMasked512Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMOVSXBW128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBWMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMOVSXBW256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBWMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMOVSXDQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXDQMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMOVSXDQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXDQMasked256Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
 (VPBLENDVB128 dst (VPMOVSXWD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWDMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
-(VPBLENDMQMasked512 dst (VPMOVUSQW128 x) mask) => (VPMOVUSQWMasked128Merging dst x mask)
-(VPBLENDVB256 dst (VPMINSB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VRSQRT14PD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRSQRT14PDMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPSRAW128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAWMasked128constMerging dst [a] x (VPMOVVec16x8ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPABSQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSQMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(VPBLENDMQMasked512 dst (VREDUCEPD512 [a] x) mask) => (VREDUCEPDMasked512Merging dst [a] x mask)
-(VPBLENDVB128 dst (VPMULHW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULHWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPSHUFHW256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFHWMasked256Merging dst [a] x (VPMOVVec16x16ToM <types.TypeMask> mask))
-(VPBLENDMWMasked512 dst (VPSRAW512const [a] x) mask) => (VPSRAWMasked512constMerging dst [a] x mask)
-(VPBLENDMDMasked512 dst (VPADDD512 x y) mask) => (VPADDDMasked512Merging dst x y mask)
-(VPBLENDMQMasked512 dst (VPOPCNTQ512 x) mask) => (VPOPCNTQMasked512Merging dst x mask)
-(VPBLENDVB128 dst (VPSHRDD128 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHRDDMasked128Merging dst [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPSUBB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPSUBSB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(VPBLENDMBMasked512 dst (VPSUBUSB512 x y) mask) => (VPSUBUSBMasked512Merging dst x y mask)
-(VPBLENDVB128 dst (VPADDSW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(VPBLENDMWMasked512 dst (VPADDUSW512 x y) mask) => (VPADDUSWMasked512Merging dst x y mask)
-(VPBLENDVB256 dst (VMAXPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMAXPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPMAXSD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPMINSB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VMULPD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMULPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(VPBLENDMDMasked512 dst (VRNDSCALEPS512 [a] x) mask) => (VRNDSCALEPSMasked512Merging dst [a] x mask)
-(VPBLENDMDMasked512 dst (VCVTPS2UDQ512 x) mask) => (VCVTPS2UDQMasked512Merging dst x mask)
-(VPBLENDVB256 dst (VDIVPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VDIVPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPMAXSQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VMINPD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMINPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPSHUFD128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFDMasked128Merging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(VPBLENDMBMasked512 dst (VPSHUFB512 x y) mask) => (VPSHUFBMasked512Merging dst x y mask)
-(VPBLENDVB256 dst (VPSHLDQ256 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHLDQMasked256Merging dst [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPBROADCASTQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTQMasked512Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VREDUCEPS128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VREDUCEPSMasked128Merging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPMOVZXWQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXWQMasked512Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VSCALEFPD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSCALEFPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(VPBLENDMDMasked512 dst (VPSHRDD512 [a] x y) mask) => (VPSHRDDMasked512Merging dst [a] x y mask)
-(VPBLENDVB128 dst (VPSRAVW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VSQRTPD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSQRTPDMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(VPBLENDMQMasked512 dst (VPXORQ512 x y) mask) => (VPXORQMasked512Merging dst x y mask)
-(VPBLENDVB128 dst (VPAVGW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPAVGWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPMOVSWB128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSWBMasked128Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VDIVPS128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VDIVPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VDIVPD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VDIVPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPMINSQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(VPBLENDMWMasked512 dst (VPOPCNTW512 x) mask) => (VPOPCNTWMasked512Merging dst x mask)
+(VPBLENDVB128 dst (VPMOVSXWD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWDMasked256Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMOVSXWQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWQMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMOVSXWQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWQMasked256Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMOVSXWQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWQMasked512Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMOVUSDB128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSDBMasked128_128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMOVUSDW128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSDWMasked128_128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMOVUSQB128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSQBMasked128_128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMOVUSQD128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSQDMasked128_128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMOVUSQW128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSQWMasked128_128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMOVUSWB128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSWBMasked128_128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMOVWB128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVWBMasked128_128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMOVZXBD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBDMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMOVZXBD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBDMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMOVZXBD512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBDMasked512Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMOVZXBQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBQMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMOVZXBQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBQMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMOVZXBQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBQMasked512Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMOVZXBW128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBWMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMOVZXBW256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBWMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMOVZXDQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXDQMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMOVZXDQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXDQMasked256Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMOVZXWD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXWDMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMOVZXWD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXWDMasked256Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMOVZXWQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXWQMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMOVZXWQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXWQMasked256Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMOVZXWQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXWQMasked512Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMULHUW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULHUWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMULHW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULHWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMULLD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULLDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMULLQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULLQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPMULLW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULLWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPOPCNTB128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPOPCNTBMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
 (VPBLENDVB128 dst (VPOPCNTD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPOPCNTDMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(VPBLENDMDMasked512 dst (VPOPCNTD512 x) mask) => (VPOPCNTDMasked512Merging dst x mask)
-(VPBLENDVB256 dst (VPABSD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSDMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPBROADCASTQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTQMasked256Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VRNDSCALEPD128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRNDSCALEPDMasked128Merging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(VPBLENDMDMasked512 dst (VPMOVDB128 x) mask) => (VPMOVDBMasked128Merging dst x mask)
-(VPBLENDVB128 dst (VPMOVSXWQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWQMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPMINUW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(VPBLENDMWMasked512 dst (VPMINUW512 x y) mask) => (VPMINUWMasked512Merging dst x y mask)
 (VPBLENDVB128 dst (VPOPCNTQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPOPCNTQMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(VPBLENDMQMasked512 dst (VPMOVQD256 x) mask) => (VPMOVQDMasked256Merging dst x mask)
-(VPBLENDVB256 dst (VPSHRDW256 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHRDWMasked256Merging dst [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(VPBLENDMDMasked512 dst (VPSRAD512const [a] x) mask) => (VPSRADMasked512constMerging dst [a] x mask)
-(VPBLENDVB128 dst (VPAVGB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPAVGBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(VPBLENDMWMasked512 dst (VPAVGW512 x y) mask) => (VPAVGWMasked512Merging dst x y mask)
-(VPBLENDVB128 dst (VPMOVSXBQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBQMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPMOVZXBW512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBWMasked512Merging dst x (VPMOVVec8x32ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPMAXSW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPMAXUD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(VPBLENDMQMasked512 dst (VPMAXUQ512 x y) mask) => (VPMAXUQMasked512Merging dst x y mask)
-(VPBLENDMDMasked512 dst (VMINPS512 x y) mask) => (VMINPSMasked512Merging dst x y mask)
-(VPBLENDMBMasked512 dst (VPABSB512 x) mask) => (VPABSBMasked512Merging dst x mask)
-(VPBLENDMDMasked512 dst (VPANDD512 x y) mask) => (VPANDDMasked512Merging dst x y mask)
-(VPBLENDVB128 dst (VPMOVZXBW256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBWMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPMOVZXBD512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBDMasked512Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPMAXSB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(VPBLENDMDMasked512 dst (VPSHUFD512 [a] x) mask) => (VPSHUFDMasked512Merging dst [a] x mask)
+(VPBLENDVB128 dst (VPOPCNTW128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPOPCNTWMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPROLD128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLDMasked128Merging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPROLQ128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLQMasked128Merging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPROLVD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLVDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPROLVQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPRORD128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORDMasked128Merging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPRORQ128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORQMasked128Merging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPRORVD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORVDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPRORVQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPSHLDD128 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHLDDMasked128Merging dst [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPSHLDQ128 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHLDQMasked128Merging dst [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPSHLDW128 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHLDWMasked128Merging dst [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPSHRDD128 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHRDDMasked128Merging dst [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPSHRDQ128 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHRDQMasked128Merging dst [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPSHRDW128 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHRDWMasked128Merging dst [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPSHUFB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPSHUFD128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFDMasked128Merging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
 (VPBLENDVB128 dst (VPSHUFHW128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFHWMasked128Merging dst [a] x (VPMOVVec16x8ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPSHRDQ256 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHRDQMasked256Merging dst [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPMADDUBSW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMADDUBSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(VPBLENDMDMasked512 dst (VPMINSD512 x y) mask) => (VPMINSDMasked512Merging dst x y mask)
-(VPBLENDMDMasked512 dst (VPSRAVD512 x y) mask) => (VPSRAVDMasked512Merging dst x y mask)
-(VPBLENDMQMasked512 dst (VSUBPD512 x y) mask) => (VSUBPDMasked512Merging dst x y mask)
-(VPBLENDVB128 dst (VPSLLW128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLWMasked128constMerging dst [a] x (VPMOVVec16x8ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPSLLD256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLDMasked256constMerging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(VPBLENDMWMasked512 dst (VPMOVSWB256 x) mask) => (VPMOVSWBMasked256Merging dst x mask)
-(VPBLENDMQMasked512 dst (VPMOVQW128 x) mask) => (VPMOVQWMasked128Merging dst x mask)
-(VPBLENDVB256 dst (VPMINUQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VRCP14PD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRCP14PDMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPSHRDD256 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHRDDMasked256Merging dst [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(VPBLENDMQMasked512 dst (VPSHRDQ512 [a] x y) mask) => (VPSHRDQMasked512Merging dst [a] x y mask)
+(VPBLENDVB128 dst (VPSLLD128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLDMasked128constMerging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPSLLQ128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLQMasked128constMerging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
 (VPBLENDVB128 dst (VPSLLVD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLVDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPSLLVQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPSLLVW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLVWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPSLLW128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLWMasked128constMerging dst [a] x (VPMOVVec16x8ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPSRAD128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRADMasked128constMerging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPSRAQ128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAQMasked128constMerging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPSRAVD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPSRAVQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPSRAVW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPSRAW128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAWMasked128constMerging dst [a] x (VPMOVVec16x8ToM <types.TypeMask> mask))
 (VPBLENDVB128 dst (VPSRLVD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPADDQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPMOVSXWQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWQMasked512Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
-(VPBLENDMQMasked512 dst (VPLZCNTQ512 x) mask) => (VPLZCNTQMasked512Merging dst x mask)
-(VPBLENDVB256 dst (VPMAXUB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPRORQ256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORQMasked256Merging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(VPBLENDMQMasked512 dst (VSCALEFPD512 x y) mask) => (VSCALEFPDMasked512Merging dst x y mask)
+(VPBLENDVB128 dst (VPSRLVQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPSRLVW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPSUBB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPSUBD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
 (VPBLENDVB128 dst (VPSUBQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPSLLD128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLDMasked128constMerging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPSUBSB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPSUBSW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPSUBUSB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBUSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPSUBUSW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBUSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VPSUBW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VRCP14PD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRCP14PDMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VREDUCEPD128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VREDUCEPDMasked128Merging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VREDUCEPS128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VREDUCEPSMasked128Merging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VRNDSCALEPD128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRNDSCALEPDMasked128Merging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VRNDSCALEPS128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRNDSCALEPSMasked128Merging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VRSQRT14PD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRSQRT14PDMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VSCALEFPD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSCALEFPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VSCALEFPS128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSCALEFPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VSQRTPD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSQRTPDMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VSQRTPS128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSQRTPSMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VSUBPD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSUBPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+(VPBLENDVB128 dst (VSUBPS128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSUBPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
 (VPBLENDVB256 dst (VADDPD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VADDPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(VPBLENDMQMasked512 dst (VPMOVSQW128 x) mask) => (VPMOVSQWMasked128Merging dst x mask)
-(VPBLENDMWMasked512 dst (VPMAXUW512 x y) mask) => (VPMAXUWMasked512Merging dst x y mask)
-(VPBLENDVB256 dst (VPSHUFB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPSRLVW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPSLLQ128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLQMasked128constMerging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPSRAD256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRADMasked256constMerging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(VPBLENDMQMasked512 dst (VPMINUQ512 x y) mask) => (VPMINUQMasked512Merging dst x y mask)
-(VPBLENDVB128 dst (VPSRAVD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(VPBLENDMWMasked512 dst (VPSRLVW512 x y) mask) => (VPSRLVWMasked512Merging dst x y mask)
-(VPBLENDVB256 dst (VPSUBW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPSRAW256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAWMasked256constMerging dst [a] x (VPMOVVec16x16ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VADDPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VADDPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VCVTPS2UDQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VCVTPS2UDQMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VCVTTPS2DQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VCVTTPS2DQMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VDIVPD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VDIVPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VDIVPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VDIVPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VGF2P8MULB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VGF2P8MULBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VMAXPD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMAXPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VMAXPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMAXPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VMINPD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMINPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VMINPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMINPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VMULPD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMULPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VMULPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMULPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPABSB256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSBMasked256Merging dst x (VPMOVVec8x32ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPABSD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSDMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPABSQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSQMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
 (VPBLENDVB256 dst (VPABSW256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSWMasked256Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask))
 (VPBLENDVB256 dst (VPACKSSDW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPACKSSDWMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPMOVSQD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSQDMasked128Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPMOVSXBD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBDMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPMOVZXBQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBQMasked512Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPLZCNTD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPLZCNTDMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPLZCNTQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPLZCNTQMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VMAXPD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMAXPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPACKUSDW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPACKUSDWMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPADDB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPADDD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPADDQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPADDSB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPADDSW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPADDUSB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDUSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPADDUSW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDUSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPADDW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPAVGB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPAVGBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
 (VPBLENDVB256 dst (VPAVGW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPAVGWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPACKSSDW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPACKSSDWMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPMOVZXWD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXWDMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPOPCNTQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPOPCNTQMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPSRAVQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPSUBSW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(VPBLENDMDMasked512 dst (VPXORD512 x y) mask) => (VPXORDMasked512Merging dst x y mask)
-(VPBLENDMBMasked512 dst (VPADDSB512 x y) mask) => (VPADDSBMasked512Merging dst x y mask)
-(VPBLENDVB128 dst (VPBROADCASTD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTDMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VMAXPS128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMAXPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPLZCNTD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPLZCNTDMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPLZCNTQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPLZCNTQMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPMADDUBSW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMADDUBSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPMADDWD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMADDWDMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPMAXSB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPMAXSD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPMAXSQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPMAXSW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPMAXUB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPMAXUD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPMAXUQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
 (VPBLENDVB256 dst (VPMAXUW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPSHRDQ128 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHRDQMasked128Merging dst [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(VPBLENDMDMasked512 dst (VPSLLVD512 x y) mask) => (VPSLLVDMasked512Merging dst x y mask)
-(VPBLENDMWMasked512 dst (VPSLLW512const [a] x) mask) => (VPSLLWMasked512constMerging dst [a] x mask)
-(VPBLENDMDMasked512 dst (VPSLLD512const [a] x) mask) => (VPSLLDMasked512constMerging dst [a] x mask)
-(VPBLENDMWMasked512 dst (VPADDSW512 x y) mask) => (VPADDSWMasked512Merging dst x y mask)
-(VPBLENDMQMasked512 dst (VPMOVSQB128 x) mask) => (VPMOVSQBMasked128Merging dst x mask)
-(VPBLENDMDMasked512 dst (VPMINUD512 x y) mask) => (VPMINUDMasked512Merging dst x y mask)
+(VPBLENDVB256 dst (VPMINSB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPMINSD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPMINSQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPMINSW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPMINUB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPMINUD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPMINUQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPMINUW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPMOVDB128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVDBMasked128_256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPMOVDW128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVDWMasked128_256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPMOVQB128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVQBMasked128_256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPMOVQD128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVQDMasked128_256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPMOVQW128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVQWMasked128_256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPMOVSDB128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSDBMasked128_256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPMOVSDW128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSDWMasked128_256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPMOVSQB128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSQBMasked128_256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPMOVSQD128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSQDMasked128_256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPMOVSQW128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSQWMasked128_256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPMOVSWB128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSWBMasked128_256Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPMOVSXBW512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBWMasked512Merging dst x (VPMOVVec8x32ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPMOVSXDQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXDQMasked512Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPMOVSXWD512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWDMasked512Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPMOVUSDB128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSDBMasked128_256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPMOVUSDW128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSDWMasked128_256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPMOVUSQB128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSQBMasked128_256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPMOVUSQD128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSQDMasked128_256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPMOVUSQW128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSQWMasked128_256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPMOVUSWB128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSWBMasked128_256Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPMOVWB128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVWBMasked128_256Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPMOVZXBW512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBWMasked512Merging dst x (VPMOVVec8x32ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPMOVZXDQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXDQMasked512Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPMOVZXWD512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXWDMasked512Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPMULHUW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULHUWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPMULHW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULHWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPMULLD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULLDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPMULLQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULLQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPMULLW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULLWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPOPCNTB256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPOPCNTBMasked256Merging dst x (VPMOVVec8x32ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPOPCNTD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPOPCNTDMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPOPCNTQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPOPCNTQMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
 (VPBLENDVB256 dst (VPOPCNTW256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPOPCNTWMasked256Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask))
-(VPBLENDMQMasked512 dst (VRSQRT14PD512 x) mask) => (VRSQRT14PDMasked512Merging dst x mask)
-(VPBLENDMDMasked512 dst (VSCALEFPS512 x y) mask) => (VSCALEFPSMasked512Merging dst x y mask)
-(VPBLENDVB128 dst (VPMAXUW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPROLD256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLDMasked256Merging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPROLQ256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLQMasked256Merging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPROLVD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPROLVQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLVQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPRORD256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORDMasked256Merging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPRORQ256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORQMasked256Merging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPRORVD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPRORVQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORVQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPSHLDD256 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHLDDMasked256Merging dst [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPSHLDQ256 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHLDQMasked256Merging dst [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPSHLDW256 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHLDWMasked256Merging dst [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPSHRDD256 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHRDDMasked256Merging dst [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPSHRDQ256 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHRDQMasked256Merging dst [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPSHRDW256 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHRDWMasked256Merging dst [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPSHUFB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPSHUFD256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFDMasked256Merging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPSHUFHW256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFHWMasked256Merging dst [a] x (VPMOVVec16x16ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPSLLD256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLDMasked256constMerging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPSLLQ256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLQMasked256constMerging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPSLLVD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPSLLVQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLVQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPSLLVW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLVWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPSLLW256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLWMasked256constMerging dst [a] x (VPMOVVec16x16ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPSRAD256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRADMasked256constMerging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPSRAQ256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAQMasked256constMerging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPSRAVD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
 (VPBLENDVB256 dst (VPSRAVQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(VPBLENDMDMasked512 dst (VSQRTPS512 x) mask) => (VSQRTPSMasked512Merging dst x mask)
-(VPBLENDMQMasked512 dst (VPSRAQ512const [a] x) mask) => (VPSRAQMasked512constMerging dst [a] x mask)
-(VPBLENDVB128 dst (VPABSB128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSBMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPABSB256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSBMasked256Merging dst x (VPMOVVec8x32ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPABSQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSQMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(VPBLENDVB256 dst (VPMOVDW128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVDWMasked128Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(VPBLENDMQMasked512 dst (VPMAXSQ512 x y) mask) => (VPMAXSQMasked512Merging dst x y mask)
-(VPBLENDVB128 dst (VSCALEFPD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSCALEFPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VSQRTPS128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSQRTPSMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPSRAVW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPSRAW256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAWMasked256constMerging dst [a] x (VPMOVVec16x16ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPSRLVD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPSRLVQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPSRLVW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPSUBB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPSUBD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPSUBQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
 (VPBLENDVB256 dst (VPSUBSB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(VPBLENDMDMasked512 dst (VPABSD512 x) mask) => (VPABSDMasked512Merging dst x mask)
-(VPBLENDVB128 dst (VPBROADCASTW512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTWMasked512Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
-(VPBLENDVB128 dst (VPMAXUB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(VPBLENDMDMasked512 dst (VMULPS512 x y) mask) => (VMULPSMasked512Merging dst x y mask)
-(VPBLENDMWMasked512 dst (VPMULLW512 x y) mask) => (VPMULLWMasked512Merging dst x y mask)
+(VPBLENDVB256 dst (VPSUBSW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPSUBUSB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBUSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPSUBUSW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBUSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VPSUBW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VRCP14PD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRCP14PDMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VREDUCEPD256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VREDUCEPDMasked256Merging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VREDUCEPS256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VREDUCEPSMasked256Merging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VRNDSCALEPD256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRNDSCALEPDMasked256Merging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VRNDSCALEPS256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRNDSCALEPSMasked256Merging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VRSQRT14PD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRSQRT14PDMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VSCALEFPD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSCALEFPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VSCALEFPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSCALEFPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VSQRTPD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSQRTPDMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VSQRTPS256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSQRTPSMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VSUBPD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSUBPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+(VPBLENDVB256 dst (VSUBPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSUBPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
 (VPABSD512 l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPABSD512load {sym} [off] ptr mem)
 (VPABSQ128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPABSQ128load {sym} [off] ptr mem)
 (VPABSQ256 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPABSQ256load {sym} [off] ptr mem)
index 4f22d8582b2c9309c59ce34ad0a5e44d6345bf1d..4e4f4a4205f28391d83cfdb6f72c44f0eab8d2ec 100644 (file)
@@ -585,37 +585,71 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
                {name: "VPMINUWMasked128", argLength: 3, reg: w2kw, asm: "VPMINUW", commutative: true, typ: "Vec128", resultInArg0: false},
                {name: "VPMINUWMasked256", argLength: 3, reg: w2kw, asm: "VPMINUW", commutative: true, typ: "Vec256", resultInArg0: false},
                {name: "VPMINUWMasked512", argLength: 3, reg: w2kw, asm: "VPMINUW", commutative: true, typ: "Vec512", resultInArg0: false},
-               {name: "VPMOVDB128", argLength: 1, reg: w11, asm: "VPMOVDB", commutative: false, typ: "Vec128", resultInArg0: false},
-               {name: "VPMOVDBMasked128", argLength: 2, reg: wkw, asm: "VPMOVDB", commutative: false, typ: "Vec128", resultInArg0: false},
-               {name: "VPMOVDW128", argLength: 1, reg: w11, asm: "VPMOVDW", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVDB128_128", argLength: 1, reg: w11, asm: "VPMOVDB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVDB128_256", argLength: 1, reg: w11, asm: "VPMOVDB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVDB128_512", argLength: 1, reg: w11, asm: "VPMOVDB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVDBMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVDB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVDBMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVDB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVDBMasked128_512", argLength: 2, reg: wkw, asm: "VPMOVDB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVDW128_128", argLength: 1, reg: w11, asm: "VPMOVDW", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVDW128_256", argLength: 1, reg: w11, asm: "VPMOVDW", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VPMOVDW256", argLength: 1, reg: w11, asm: "VPMOVDW", commutative: false, typ: "Vec256", resultInArg0: false},
-               {name: "VPMOVDWMasked128", argLength: 2, reg: wkw, asm: "VPMOVDW", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVDWMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVDW", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVDWMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVDW", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VPMOVDWMasked256", argLength: 2, reg: wkw, asm: "VPMOVDW", commutative: false, typ: "Vec256", resultInArg0: false},
-               {name: "VPMOVQB128", argLength: 1, reg: w11, asm: "VPMOVQB", commutative: false, typ: "Vec128", resultInArg0: false},
-               {name: "VPMOVQBMasked128", argLength: 2, reg: wkw, asm: "VPMOVQB", commutative: false, typ: "Vec128", resultInArg0: false},
-               {name: "VPMOVQD128", argLength: 1, reg: w11, asm: "VPMOVQD", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVQB128_128", argLength: 1, reg: w11, asm: "VPMOVQB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVQB128_256", argLength: 1, reg: w11, asm: "VPMOVQB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVQB128_512", argLength: 1, reg: w11, asm: "VPMOVQB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVQBMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVQB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVQBMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVQB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVQBMasked128_512", argLength: 2, reg: wkw, asm: "VPMOVQB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVQD128_128", argLength: 1, reg: w11, asm: "VPMOVQD", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVQD128_256", argLength: 1, reg: w11, asm: "VPMOVQD", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VPMOVQD256", argLength: 1, reg: w11, asm: "VPMOVQD", commutative: false, typ: "Vec256", resultInArg0: false},
-               {name: "VPMOVQDMasked128", argLength: 2, reg: wkw, asm: "VPMOVQD", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVQDMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVQD", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVQDMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVQD", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VPMOVQDMasked256", argLength: 2, reg: wkw, asm: "VPMOVQD", commutative: false, typ: "Vec256", resultInArg0: false},
-               {name: "VPMOVQW128", argLength: 1, reg: w11, asm: "VPMOVQW", commutative: false, typ: "Vec128", resultInArg0: false},
-               {name: "VPMOVQWMasked128", argLength: 2, reg: wkw, asm: "VPMOVQW", commutative: false, typ: "Vec128", resultInArg0: false},
-               {name: "VPMOVSDB128", argLength: 1, reg: w11, asm: "VPMOVSDB", commutative: false, typ: "Vec128", resultInArg0: false},
-               {name: "VPMOVSDBMasked128", argLength: 2, reg: wkw, asm: "VPMOVSDB", commutative: false, typ: "Vec128", resultInArg0: false},
-               {name: "VPMOVSDW128", argLength: 1, reg: w11, asm: "VPMOVSDW", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVQW128_128", argLength: 1, reg: w11, asm: "VPMOVQW", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVQW128_256", argLength: 1, reg: w11, asm: "VPMOVQW", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVQW128_512", argLength: 1, reg: w11, asm: "VPMOVQW", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVQWMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVQW", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVQWMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVQW", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVQWMasked128_512", argLength: 2, reg: wkw, asm: "VPMOVQW", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVSDB128_128", argLength: 1, reg: w11, asm: "VPMOVSDB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVSDB128_256", argLength: 1, reg: w11, asm: "VPMOVSDB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVSDB128_512", argLength: 1, reg: w11, asm: "VPMOVSDB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVSDBMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVSDB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVSDBMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVSDB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVSDBMasked128_512", argLength: 2, reg: wkw, asm: "VPMOVSDB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVSDW128_128", argLength: 1, reg: w11, asm: "VPMOVSDW", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVSDW128_256", argLength: 1, reg: w11, asm: "VPMOVSDW", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VPMOVSDW256", argLength: 1, reg: w11, asm: "VPMOVSDW", commutative: false, typ: "Vec256", resultInArg0: false},
-               {name: "VPMOVSDWMasked128", argLength: 2, reg: wkw, asm: "VPMOVSDW", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVSDWMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVSDW", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVSDWMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVSDW", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VPMOVSDWMasked256", argLength: 2, reg: wkw, asm: "VPMOVSDW", commutative: false, typ: "Vec256", resultInArg0: false},
-               {name: "VPMOVSQB128", argLength: 1, reg: w11, asm: "VPMOVSQB", commutative: false, typ: "Vec128", resultInArg0: false},
-               {name: "VPMOVSQBMasked128", argLength: 2, reg: wkw, asm: "VPMOVSQB", commutative: false, typ: "Vec128", resultInArg0: false},
-               {name: "VPMOVSQD128", argLength: 1, reg: w11, asm: "VPMOVSQD", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVSQB128_128", argLength: 1, reg: w11, asm: "VPMOVSQB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVSQB128_256", argLength: 1, reg: w11, asm: "VPMOVSQB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVSQB128_512", argLength: 1, reg: w11, asm: "VPMOVSQB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVSQBMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVSQB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVSQBMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVSQB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVSQBMasked128_512", argLength: 2, reg: wkw, asm: "VPMOVSQB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVSQD128_128", argLength: 1, reg: w11, asm: "VPMOVSQD", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVSQD128_256", argLength: 1, reg: w11, asm: "VPMOVSQD", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VPMOVSQD256", argLength: 1, reg: w11, asm: "VPMOVSQD", commutative: false, typ: "Vec256", resultInArg0: false},
-               {name: "VPMOVSQDMasked128", argLength: 2, reg: wkw, asm: "VPMOVSQD", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVSQDMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVSQD", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVSQDMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVSQD", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VPMOVSQDMasked256", argLength: 2, reg: wkw, asm: "VPMOVSQD", commutative: false, typ: "Vec256", resultInArg0: false},
-               {name: "VPMOVSQW128", argLength: 1, reg: w11, asm: "VPMOVSQW", commutative: false, typ: "Vec128", resultInArg0: false},
-               {name: "VPMOVSQWMasked128", argLength: 2, reg: wkw, asm: "VPMOVSQW", commutative: false, typ: "Vec128", resultInArg0: false},
-               {name: "VPMOVSWB128", argLength: 1, reg: w11, asm: "VPMOVSWB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVSQW128_128", argLength: 1, reg: w11, asm: "VPMOVSQW", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVSQW128_256", argLength: 1, reg: w11, asm: "VPMOVSQW", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVSQW128_512", argLength: 1, reg: w11, asm: "VPMOVSQW", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVSQWMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVSQW", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVSQWMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVSQW", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVSQWMasked128_512", argLength: 2, reg: wkw, asm: "VPMOVSQW", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVSWB128_128", argLength: 1, reg: w11, asm: "VPMOVSWB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVSWB128_256", argLength: 1, reg: w11, asm: "VPMOVSWB", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VPMOVSWB256", argLength: 1, reg: w11, asm: "VPMOVSWB", commutative: false, typ: "Vec256", resultInArg0: false},
-               {name: "VPMOVSWBMasked128", argLength: 2, reg: wkw, asm: "VPMOVSWB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVSWBMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVSWB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVSWBMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVSWB", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VPMOVSWBMasked256", argLength: 2, reg: wkw, asm: "VPMOVSWB", commutative: false, typ: "Vec256", resultInArg0: false},
                {name: "VPMOVSXBD128", argLength: 1, reg: v11, asm: "VPMOVSXBD", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VPMOVSXBD256", argLength: 1, reg: v11, asm: "VPMOVSXBD", commutative: false, typ: "Vec256", resultInArg0: false},
@@ -653,27 +687,47 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
                {name: "VPMOVSXWQMasked128", argLength: 2, reg: wkw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VPMOVSXWQMasked256", argLength: 2, reg: wkw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec256", resultInArg0: false},
                {name: "VPMOVSXWQMasked512", argLength: 2, reg: wkw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec512", resultInArg0: false},
-               {name: "VPMOVUSDB128", argLength: 1, reg: w11, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: false},
-               {name: "VPMOVUSDBMasked128", argLength: 2, reg: wkw, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: false},
-               {name: "VPMOVUSDW128", argLength: 1, reg: w11, asm: "VPMOVUSDW", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVUSDB128_128", argLength: 1, reg: w11, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVUSDB128_256", argLength: 1, reg: w11, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVUSDB128_512", argLength: 1, reg: w11, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVUSDBMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVUSDBMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVUSDBMasked128_512", argLength: 2, reg: wkw, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVUSDW128_128", argLength: 1, reg: w11, asm: "VPMOVUSDW", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVUSDW128_256", argLength: 1, reg: w11, asm: "VPMOVUSDW", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VPMOVUSDW256", argLength: 1, reg: w11, asm: "VPMOVUSDW", commutative: false, typ: "Vec256", resultInArg0: false},
-               {name: "VPMOVUSDWMasked128", argLength: 2, reg: wkw, asm: "VPMOVUSDW", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVUSDWMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVUSDW", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVUSDWMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVUSDW", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VPMOVUSDWMasked256", argLength: 2, reg: wkw, asm: "VPMOVUSDW", commutative: false, typ: "Vec256", resultInArg0: false},
-               {name: "VPMOVUSQB128", argLength: 1, reg: w11, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: false},
-               {name: "VPMOVUSQBMasked128", argLength: 2, reg: wkw, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: false},
-               {name: "VPMOVUSQD128", argLength: 1, reg: w11, asm: "VPMOVUSQD", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVUSQB128_128", argLength: 1, reg: w11, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVUSQB128_256", argLength: 1, reg: w11, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVUSQB128_512", argLength: 1, reg: w11, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVUSQBMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVUSQBMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVUSQBMasked128_512", argLength: 2, reg: wkw, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVUSQD128_128", argLength: 1, reg: w11, asm: "VPMOVUSQD", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVUSQD128_256", argLength: 1, reg: w11, asm: "VPMOVUSQD", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VPMOVUSQD256", argLength: 1, reg: w11, asm: "VPMOVUSQD", commutative: false, typ: "Vec256", resultInArg0: false},
-               {name: "VPMOVUSQDMasked128", argLength: 2, reg: wkw, asm: "VPMOVUSQD", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVUSQDMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVUSQD", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVUSQDMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVUSQD", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VPMOVUSQDMasked256", argLength: 2, reg: wkw, asm: "VPMOVUSQD", commutative: false, typ: "Vec256", resultInArg0: false},
-               {name: "VPMOVUSQW128", argLength: 1, reg: w11, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: false},
-               {name: "VPMOVUSQWMasked128", argLength: 2, reg: wkw, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: false},
-               {name: "VPMOVUSWB128", argLength: 1, reg: w11, asm: "VPMOVUSWB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVUSQW128_128", argLength: 1, reg: w11, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVUSQW128_256", argLength: 1, reg: w11, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVUSQW128_512", argLength: 1, reg: w11, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVUSQWMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVUSQWMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVUSQWMasked128_512", argLength: 2, reg: wkw, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVUSWB128_128", argLength: 1, reg: w11, asm: "VPMOVUSWB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVUSWB128_256", argLength: 1, reg: w11, asm: "VPMOVUSWB", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VPMOVUSWB256", argLength: 1, reg: w11, asm: "VPMOVUSWB", commutative: false, typ: "Vec256", resultInArg0: false},
-               {name: "VPMOVUSWBMasked128", argLength: 2, reg: wkw, asm: "VPMOVUSWB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVUSWBMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVUSWB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVUSWBMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVUSWB", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VPMOVUSWBMasked256", argLength: 2, reg: wkw, asm: "VPMOVUSWB", commutative: false, typ: "Vec256", resultInArg0: false},
-               {name: "VPMOVWB128", argLength: 1, reg: w11, asm: "VPMOVWB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVWB128_128", argLength: 1, reg: w11, asm: "VPMOVWB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVWB128_256", argLength: 1, reg: w11, asm: "VPMOVWB", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VPMOVWB256", argLength: 1, reg: w11, asm: "VPMOVWB", commutative: false, typ: "Vec256", resultInArg0: false},
-               {name: "VPMOVWBMasked128", argLength: 2, reg: wkw, asm: "VPMOVWB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVWBMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVWB", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VPMOVWBMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVWB", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VPMOVWBMasked256", argLength: 2, reg: wkw, asm: "VPMOVWB", commutative: false, typ: "Vec256", resultInArg0: false},
                {name: "VPMOVZXBD128", argLength: 1, reg: v11, asm: "VPMOVZXBD", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VPMOVZXBD256", argLength: 1, reg: v11, asm: "VPMOVZXBD", commutative: false, typ: "Vec256", resultInArg0: false},
@@ -2064,21 +2118,38 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
                {name: "VPMINUWMasked128Merging", argLength: 4, reg: w3kw, asm: "VPMINUW", commutative: false, typ: "Vec128", resultInArg0: true},
                {name: "VPMINUWMasked256Merging", argLength: 4, reg: w3kw, asm: "VPMINUW", commutative: false, typ: "Vec256", resultInArg0: true},
                {name: "VPMINUWMasked512Merging", argLength: 4, reg: w3kw, asm: "VPMINUW", commutative: false, typ: "Vec512", resultInArg0: true},
-               {name: "VPMOVDBMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVDB", commutative: false, typ: "Vec128", resultInArg0: true},
-               {name: "VPMOVDWMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVDW", commutative: false, typ: "Vec128", resultInArg0: true},
+               {name: "VPMOVDBMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVDB", commutative: false, typ: "Vec128", resultInArg0: true},
+               {name: "VPMOVDBMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVDB", commutative: false, typ: "Vec128", resultInArg0: true},
+               {name: "VPMOVDBMasked128_512Merging", argLength: 3, reg: w2kw, asm: "VPMOVDB", commutative: false, typ: "Vec128", resultInArg0: true},
+               {name: "VPMOVDWMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVDW", commutative: false, typ: "Vec128", resultInArg0: true},
+               {name: "VPMOVDWMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVDW", commutative: false, typ: "Vec128", resultInArg0: true},
                {name: "VPMOVDWMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVDW", commutative: false, typ: "Vec256", resultInArg0: true},
-               {name: "VPMOVQBMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVQB", commutative: false, typ: "Vec128", resultInArg0: true},
-               {name: "VPMOVQDMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVQD", commutative: false, typ: "Vec128", resultInArg0: true},
+               {name: "VPMOVQBMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVQB", commutative: false, typ: "Vec128", resultInArg0: true},
+               {name: "VPMOVQBMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVQB", commutative: false, typ: "Vec128", resultInArg0: true},
+               {name: "VPMOVQBMasked128_512Merging", argLength: 3, reg: w2kw, asm: "VPMOVQB", commutative: false, typ: "Vec128", resultInArg0: true},
+               {name: "VPMOVQDMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVQD", commutative: false, typ: "Vec128", resultInArg0: true},
+               {name: "VPMOVQDMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVQD", commutative: false, typ: "Vec128", resultInArg0: true},
                {name: "VPMOVQDMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVQD", commutative: false, typ: "Vec256", resultInArg0: true},
-               {name: "VPMOVQWMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVQW", commutative: false, typ: "Vec128", resultInArg0: true},
-               {name: "VPMOVSDBMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSDB", commutative: false, typ: "Vec128", resultInArg0: true},
-               {name: "VPMOVSDWMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSDW", commutative: false, typ: "Vec128", resultInArg0: true},
+               {name: "VPMOVQWMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVQW", commutative: false, typ: "Vec128", resultInArg0: true},
+               {name: "VPMOVQWMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVQW", commutative: false, typ: "Vec128", resultInArg0: true},
+               {name: "VPMOVQWMasked128_512Merging", argLength: 3, reg: w2kw, asm: "VPMOVQW", commutative: false, typ: "Vec128", resultInArg0: true},
+               {name: "VPMOVSDBMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSDB", commutative: false, typ: "Vec128", resultInArg0: true},
+               {name: "VPMOVSDBMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVSDB", commutative: false, typ: "Vec128", resultInArg0: true},
+               {name: "VPMOVSDBMasked128_512Merging", argLength: 3, reg: w2kw, asm: "VPMOVSDB", commutative: false, typ: "Vec128", resultInArg0: true},
+               {name: "VPMOVSDWMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSDW", commutative: false, typ: "Vec128", resultInArg0: true},
+               {name: "VPMOVSDWMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVSDW", commutative: false, typ: "Vec128", resultInArg0: true},
                {name: "VPMOVSDWMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVSDW", commutative: false, typ: "Vec256", resultInArg0: true},
-               {name: "VPMOVSQBMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSQB", commutative: false, typ: "Vec128", resultInArg0: true},
-               {name: "VPMOVSQDMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSQD", commutative: false, typ: "Vec128", resultInArg0: true},
+               {name: "VPMOVSQBMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSQB", commutative: false, typ: "Vec128", resultInArg0: true},
+               {name: "VPMOVSQBMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVSQB", commutative: false, typ: "Vec128", resultInArg0: true},
+               {name: "VPMOVSQBMasked128_512Merging", argLength: 3, reg: w2kw, asm: "VPMOVSQB", commutative: false, typ: "Vec128", resultInArg0: true},
+               {name: "VPMOVSQDMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSQD", commutative: false, typ: "Vec128", resultInArg0: true},
+               {name: "VPMOVSQDMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVSQD", commutative: false, typ: "Vec128", resultInArg0: true},
                {name: "VPMOVSQDMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVSQD", commutative: false, typ: "Vec256", resultInArg0: true},
-               {name: "VPMOVSQWMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSQW", commutative: false, typ: "Vec128", resultInArg0: true},
-               {name: "VPMOVSWBMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSWB", commutative: false, typ: "Vec128", resultInArg0: true},
+               {name: "VPMOVSQWMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSQW", commutative: false, typ: "Vec128", resultInArg0: true},
+               {name: "VPMOVSQWMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVSQW", commutative: false, typ: "Vec128", resultInArg0: true},
+               {name: "VPMOVSQWMasked128_512Merging", argLength: 3, reg: w2kw, asm: "VPMOVSQW", commutative: false, typ: "Vec128", resultInArg0: true},
+               {name: "VPMOVSWBMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSWB", commutative: false, typ: "Vec128", resultInArg0: true},
+               {name: "VPMOVSWBMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVSWB", commutative: false, typ: "Vec128", resultInArg0: true},
                {name: "VPMOVSWBMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVSWB", commutative: false, typ: "Vec256", resultInArg0: true},
                {name: "VPMOVSXBDMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXBD", commutative: false, typ: "Vec128", resultInArg0: true},
                {name: "VPMOVSXBDMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXBD", commutative: false, typ: "Vec256", resultInArg0: true},
@@ -2098,16 +2169,26 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
                {name: "VPMOVSXWQMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec128", resultInArg0: true},
                {name: "VPMOVSXWQMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec256", resultInArg0: true},
                {name: "VPMOVSXWQMasked512Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec512", resultInArg0: true},
-               {name: "VPMOVUSDBMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: true},
-               {name: "VPMOVUSDWMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSDW", commutative: false, typ: "Vec128", resultInArg0: true},
+               {name: "VPMOVUSDBMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: true},
+               {name: "VPMOVUSDBMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: true},
+               {name: "VPMOVUSDBMasked128_512Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: true},
+               {name: "VPMOVUSDWMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSDW", commutative: false, typ: "Vec128", resultInArg0: true},
+               {name: "VPMOVUSDWMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSDW", commutative: false, typ: "Vec128", resultInArg0: true},
                {name: "VPMOVUSDWMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSDW", commutative: false, typ: "Vec256", resultInArg0: true},
-               {name: "VPMOVUSQBMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: true},
-               {name: "VPMOVUSQDMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQD", commutative: false, typ: "Vec128", resultInArg0: true},
+               {name: "VPMOVUSQBMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: true},
+               {name: "VPMOVUSQBMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: true},
+               {name: "VPMOVUSQBMasked128_512Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: true},
+               {name: "VPMOVUSQDMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQD", commutative: false, typ: "Vec128", resultInArg0: true},
+               {name: "VPMOVUSQDMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQD", commutative: false, typ: "Vec128", resultInArg0: true},
                {name: "VPMOVUSQDMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQD", commutative: false, typ: "Vec256", resultInArg0: true},
-               {name: "VPMOVUSQWMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: true},
-               {name: "VPMOVUSWBMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSWB", commutative: false, typ: "Vec128", resultInArg0: true},
+               {name: "VPMOVUSQWMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: true},
+               {name: "VPMOVUSQWMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: true},
+               {name: "VPMOVUSQWMasked128_512Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: true},
+               {name: "VPMOVUSWBMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSWB", commutative: false, typ: "Vec128", resultInArg0: true},
+               {name: "VPMOVUSWBMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSWB", commutative: false, typ: "Vec128", resultInArg0: true},
                {name: "VPMOVUSWBMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSWB", commutative: false, typ: "Vec256", resultInArg0: true},
-               {name: "VPMOVWBMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVWB", commutative: false, typ: "Vec128", resultInArg0: true},
+               {name: "VPMOVWBMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVWB", commutative: false, typ: "Vec128", resultInArg0: true},
+               {name: "VPMOVWBMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVWB", commutative: false, typ: "Vec128", resultInArg0: true},
                {name: "VPMOVWBMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVWB", commutative: false, typ: "Vec256", resultInArg0: true},
                {name: "VPMOVZXBDMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVZXBD", commutative: false, typ: "Vec128", resultInArg0: true},
                {name: "VPMOVZXBDMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVZXBD", commutative: false, typ: "Vec256", resultInArg0: true},
index 4dd7faeebf3dfed39493f2da7d5efac5e8a371cb..1d3875a9be2c78c7156f37967d981a60dee3913f 100644 (file)
@@ -1826,37 +1826,71 @@ const (
        OpAMD64VPMINUWMasked128
        OpAMD64VPMINUWMasked256
        OpAMD64VPMINUWMasked512
-       OpAMD64VPMOVDB128
-       OpAMD64VPMOVDBMasked128
-       OpAMD64VPMOVDW128
+       OpAMD64VPMOVDB128_128
+       OpAMD64VPMOVDB128_256
+       OpAMD64VPMOVDB128_512
+       OpAMD64VPMOVDBMasked128_128
+       OpAMD64VPMOVDBMasked128_256
+       OpAMD64VPMOVDBMasked128_512
+       OpAMD64VPMOVDW128_128
+       OpAMD64VPMOVDW128_256
        OpAMD64VPMOVDW256
-       OpAMD64VPMOVDWMasked128
+       OpAMD64VPMOVDWMasked128_128
+       OpAMD64VPMOVDWMasked128_256
        OpAMD64VPMOVDWMasked256
-       OpAMD64VPMOVQB128
-       OpAMD64VPMOVQBMasked128
-       OpAMD64VPMOVQD128
+       OpAMD64VPMOVQB128_128
+       OpAMD64VPMOVQB128_256
+       OpAMD64VPMOVQB128_512
+       OpAMD64VPMOVQBMasked128_128
+       OpAMD64VPMOVQBMasked128_256
+       OpAMD64VPMOVQBMasked128_512
+       OpAMD64VPMOVQD128_128
+       OpAMD64VPMOVQD128_256
        OpAMD64VPMOVQD256
-       OpAMD64VPMOVQDMasked128
+       OpAMD64VPMOVQDMasked128_128
+       OpAMD64VPMOVQDMasked128_256
        OpAMD64VPMOVQDMasked256
-       OpAMD64VPMOVQW128
-       OpAMD64VPMOVQWMasked128
-       OpAMD64VPMOVSDB128
-       OpAMD64VPMOVSDBMasked128
-       OpAMD64VPMOVSDW128
+       OpAMD64VPMOVQW128_128
+       OpAMD64VPMOVQW128_256
+       OpAMD64VPMOVQW128_512
+       OpAMD64VPMOVQWMasked128_128
+       OpAMD64VPMOVQWMasked128_256
+       OpAMD64VPMOVQWMasked128_512
+       OpAMD64VPMOVSDB128_128
+       OpAMD64VPMOVSDB128_256
+       OpAMD64VPMOVSDB128_512
+       OpAMD64VPMOVSDBMasked128_128
+       OpAMD64VPMOVSDBMasked128_256
+       OpAMD64VPMOVSDBMasked128_512
+       OpAMD64VPMOVSDW128_128
+       OpAMD64VPMOVSDW128_256
        OpAMD64VPMOVSDW256
-       OpAMD64VPMOVSDWMasked128
+       OpAMD64VPMOVSDWMasked128_128
+       OpAMD64VPMOVSDWMasked128_256
        OpAMD64VPMOVSDWMasked256
-       OpAMD64VPMOVSQB128
-       OpAMD64VPMOVSQBMasked128
-       OpAMD64VPMOVSQD128
+       OpAMD64VPMOVSQB128_128
+       OpAMD64VPMOVSQB128_256
+       OpAMD64VPMOVSQB128_512
+       OpAMD64VPMOVSQBMasked128_128
+       OpAMD64VPMOVSQBMasked128_256
+       OpAMD64VPMOVSQBMasked128_512
+       OpAMD64VPMOVSQD128_128
+       OpAMD64VPMOVSQD128_256
        OpAMD64VPMOVSQD256
-       OpAMD64VPMOVSQDMasked128
+       OpAMD64VPMOVSQDMasked128_128
+       OpAMD64VPMOVSQDMasked128_256
        OpAMD64VPMOVSQDMasked256
-       OpAMD64VPMOVSQW128
-       OpAMD64VPMOVSQWMasked128
-       OpAMD64VPMOVSWB128
+       OpAMD64VPMOVSQW128_128
+       OpAMD64VPMOVSQW128_256
+       OpAMD64VPMOVSQW128_512
+       OpAMD64VPMOVSQWMasked128_128
+       OpAMD64VPMOVSQWMasked128_256
+       OpAMD64VPMOVSQWMasked128_512
+       OpAMD64VPMOVSWB128_128
+       OpAMD64VPMOVSWB128_256
        OpAMD64VPMOVSWB256
-       OpAMD64VPMOVSWBMasked128
+       OpAMD64VPMOVSWBMasked128_128
+       OpAMD64VPMOVSWBMasked128_256
        OpAMD64VPMOVSWBMasked256
        OpAMD64VPMOVSXBD128
        OpAMD64VPMOVSXBD256
@@ -1894,27 +1928,47 @@ const (
        OpAMD64VPMOVSXWQMasked128
        OpAMD64VPMOVSXWQMasked256
        OpAMD64VPMOVSXWQMasked512
-       OpAMD64VPMOVUSDB128
-       OpAMD64VPMOVUSDBMasked128
-       OpAMD64VPMOVUSDW128
+       OpAMD64VPMOVUSDB128_128
+       OpAMD64VPMOVUSDB128_256
+       OpAMD64VPMOVUSDB128_512
+       OpAMD64VPMOVUSDBMasked128_128
+       OpAMD64VPMOVUSDBMasked128_256
+       OpAMD64VPMOVUSDBMasked128_512
+       OpAMD64VPMOVUSDW128_128
+       OpAMD64VPMOVUSDW128_256
        OpAMD64VPMOVUSDW256
-       OpAMD64VPMOVUSDWMasked128
+       OpAMD64VPMOVUSDWMasked128_128
+       OpAMD64VPMOVUSDWMasked128_256
        OpAMD64VPMOVUSDWMasked256
-       OpAMD64VPMOVUSQB128
-       OpAMD64VPMOVUSQBMasked128
-       OpAMD64VPMOVUSQD128
+       OpAMD64VPMOVUSQB128_128
+       OpAMD64VPMOVUSQB128_256
+       OpAMD64VPMOVUSQB128_512
+       OpAMD64VPMOVUSQBMasked128_128
+       OpAMD64VPMOVUSQBMasked128_256
+       OpAMD64VPMOVUSQBMasked128_512
+       OpAMD64VPMOVUSQD128_128
+       OpAMD64VPMOVUSQD128_256
        OpAMD64VPMOVUSQD256
-       OpAMD64VPMOVUSQDMasked128
+       OpAMD64VPMOVUSQDMasked128_128
+       OpAMD64VPMOVUSQDMasked128_256
        OpAMD64VPMOVUSQDMasked256
-       OpAMD64VPMOVUSQW128
-       OpAMD64VPMOVUSQWMasked128
-       OpAMD64VPMOVUSWB128
+       OpAMD64VPMOVUSQW128_128
+       OpAMD64VPMOVUSQW128_256
+       OpAMD64VPMOVUSQW128_512
+       OpAMD64VPMOVUSQWMasked128_128
+       OpAMD64VPMOVUSQWMasked128_256
+       OpAMD64VPMOVUSQWMasked128_512
+       OpAMD64VPMOVUSWB128_128
+       OpAMD64VPMOVUSWB128_256
        OpAMD64VPMOVUSWB256
-       OpAMD64VPMOVUSWBMasked128
+       OpAMD64VPMOVUSWBMasked128_128
+       OpAMD64VPMOVUSWBMasked128_256
        OpAMD64VPMOVUSWBMasked256
-       OpAMD64VPMOVWB128
+       OpAMD64VPMOVWB128_128
+       OpAMD64VPMOVWB128_256
        OpAMD64VPMOVWB256
-       OpAMD64VPMOVWBMasked128
+       OpAMD64VPMOVWBMasked128_128
+       OpAMD64VPMOVWBMasked128_256
        OpAMD64VPMOVWBMasked256
        OpAMD64VPMOVZXBD128
        OpAMD64VPMOVZXBD256
@@ -3305,21 +3359,38 @@ const (
        OpAMD64VPMINUWMasked128Merging
        OpAMD64VPMINUWMasked256Merging
        OpAMD64VPMINUWMasked512Merging
-       OpAMD64VPMOVDBMasked128Merging
-       OpAMD64VPMOVDWMasked128Merging
+       OpAMD64VPMOVDBMasked128_128Merging
+       OpAMD64VPMOVDBMasked128_256Merging
+       OpAMD64VPMOVDBMasked128_512Merging
+       OpAMD64VPMOVDWMasked128_128Merging
+       OpAMD64VPMOVDWMasked128_256Merging
        OpAMD64VPMOVDWMasked256Merging
-       OpAMD64VPMOVQBMasked128Merging
-       OpAMD64VPMOVQDMasked128Merging
+       OpAMD64VPMOVQBMasked128_128Merging
+       OpAMD64VPMOVQBMasked128_256Merging
+       OpAMD64VPMOVQBMasked128_512Merging
+       OpAMD64VPMOVQDMasked128_128Merging
+       OpAMD64VPMOVQDMasked128_256Merging
        OpAMD64VPMOVQDMasked256Merging
-       OpAMD64VPMOVQWMasked128Merging
-       OpAMD64VPMOVSDBMasked128Merging
-       OpAMD64VPMOVSDWMasked128Merging
+       OpAMD64VPMOVQWMasked128_128Merging
+       OpAMD64VPMOVQWMasked128_256Merging
+       OpAMD64VPMOVQWMasked128_512Merging
+       OpAMD64VPMOVSDBMasked128_128Merging
+       OpAMD64VPMOVSDBMasked128_256Merging
+       OpAMD64VPMOVSDBMasked128_512Merging
+       OpAMD64VPMOVSDWMasked128_128Merging
+       OpAMD64VPMOVSDWMasked128_256Merging
        OpAMD64VPMOVSDWMasked256Merging
-       OpAMD64VPMOVSQBMasked128Merging
-       OpAMD64VPMOVSQDMasked128Merging
+       OpAMD64VPMOVSQBMasked128_128Merging
+       OpAMD64VPMOVSQBMasked128_256Merging
+       OpAMD64VPMOVSQBMasked128_512Merging
+       OpAMD64VPMOVSQDMasked128_128Merging
+       OpAMD64VPMOVSQDMasked128_256Merging
        OpAMD64VPMOVSQDMasked256Merging
-       OpAMD64VPMOVSQWMasked128Merging
-       OpAMD64VPMOVSWBMasked128Merging
+       OpAMD64VPMOVSQWMasked128_128Merging
+       OpAMD64VPMOVSQWMasked128_256Merging
+       OpAMD64VPMOVSQWMasked128_512Merging
+       OpAMD64VPMOVSWBMasked128_128Merging
+       OpAMD64VPMOVSWBMasked128_256Merging
        OpAMD64VPMOVSWBMasked256Merging
        OpAMD64VPMOVSXBDMasked128Merging
        OpAMD64VPMOVSXBDMasked256Merging
@@ -3339,16 +3410,26 @@ const (
        OpAMD64VPMOVSXWQMasked128Merging
        OpAMD64VPMOVSXWQMasked256Merging
        OpAMD64VPMOVSXWQMasked512Merging
-       OpAMD64VPMOVUSDBMasked128Merging
-       OpAMD64VPMOVUSDWMasked128Merging
+       OpAMD64VPMOVUSDBMasked128_128Merging
+       OpAMD64VPMOVUSDBMasked128_256Merging
+       OpAMD64VPMOVUSDBMasked128_512Merging
+       OpAMD64VPMOVUSDWMasked128_128Merging
+       OpAMD64VPMOVUSDWMasked128_256Merging
        OpAMD64VPMOVUSDWMasked256Merging
-       OpAMD64VPMOVUSQBMasked128Merging
-       OpAMD64VPMOVUSQDMasked128Merging
+       OpAMD64VPMOVUSQBMasked128_128Merging
+       OpAMD64VPMOVUSQBMasked128_256Merging
+       OpAMD64VPMOVUSQBMasked128_512Merging
+       OpAMD64VPMOVUSQDMasked128_128Merging
+       OpAMD64VPMOVUSQDMasked128_256Merging
        OpAMD64VPMOVUSQDMasked256Merging
-       OpAMD64VPMOVUSQWMasked128Merging
-       OpAMD64VPMOVUSWBMasked128Merging
+       OpAMD64VPMOVUSQWMasked128_128Merging
+       OpAMD64VPMOVUSQWMasked128_256Merging
+       OpAMD64VPMOVUSQWMasked128_512Merging
+       OpAMD64VPMOVUSWBMasked128_128Merging
+       OpAMD64VPMOVUSWBMasked128_256Merging
        OpAMD64VPMOVUSWBMasked256Merging
-       OpAMD64VPMOVWBMasked128Merging
+       OpAMD64VPMOVWBMasked128_128Merging
+       OpAMD64VPMOVWBMasked128_256Merging
        OpAMD64VPMOVWBMasked256Merging
        OpAMD64VPMOVZXBDMasked128Merging
        OpAMD64VPMOVZXBDMasked256Merging
@@ -29124,7 +29205,7 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:   "VPMOVDB128",
+               name:   "VPMOVDB128_128",
                argLen: 1,
                asm:    x86.AVPMOVDB,
                reg: regInfo{
@@ -29137,7 +29218,61 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:   "VPMOVDBMasked128",
+               name:   "VPMOVDB128_256",
+               argLen: 1,
+               asm:    x86.AVPMOVDB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVDB128_512",
+               argLen: 1,
+               asm:    x86.AVPMOVDB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVDBMasked128_128",
+               argLen: 2,
+               asm:    x86.AVPMOVDB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVDBMasked128_256",
+               argLen: 2,
+               asm:    x86.AVPMOVDB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVDBMasked128_512",
                argLen: 2,
                asm:    x86.AVPMOVDB,
                reg: regInfo{
@@ -29151,7 +29286,20 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:   "VPMOVDW128",
+               name:   "VPMOVDW128_128",
+               argLen: 1,
+               asm:    x86.AVPMOVDW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVDW128_256",
                argLen: 1,
                asm:    x86.AVPMOVDW,
                reg: regInfo{
@@ -29177,7 +29325,21 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:   "VPMOVDWMasked128",
+               name:   "VPMOVDWMasked128_128",
+               argLen: 2,
+               asm:    x86.AVPMOVDW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVDWMasked128_256",
                argLen: 2,
                asm:    x86.AVPMOVDW,
                reg: regInfo{
@@ -29205,7 +29367,7 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:   "VPMOVQB128",
+               name:   "VPMOVQB128_128",
                argLen: 1,
                asm:    x86.AVPMOVQB,
                reg: regInfo{
@@ -29218,7 +29380,61 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:   "VPMOVQBMasked128",
+               name:   "VPMOVQB128_256",
+               argLen: 1,
+               asm:    x86.AVPMOVQB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVQB128_512",
+               argLen: 1,
+               asm:    x86.AVPMOVQB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVQBMasked128_128",
+               argLen: 2,
+               asm:    x86.AVPMOVQB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVQBMasked128_256",
+               argLen: 2,
+               asm:    x86.AVPMOVQB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVQBMasked128_512",
                argLen: 2,
                asm:    x86.AVPMOVQB,
                reg: regInfo{
@@ -29232,7 +29448,20 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:   "VPMOVQD128",
+               name:   "VPMOVQD128_128",
+               argLen: 1,
+               asm:    x86.AVPMOVQD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVQD128_256",
                argLen: 1,
                asm:    x86.AVPMOVQD,
                reg: regInfo{
@@ -29258,7 +29487,21 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:   "VPMOVQDMasked128",
+               name:   "VPMOVQDMasked128_128",
+               argLen: 2,
+               asm:    x86.AVPMOVQD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVQDMasked128_256",
                argLen: 2,
                asm:    x86.AVPMOVQD,
                reg: regInfo{
@@ -29286,7 +29529,7 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:   "VPMOVQW128",
+               name:   "VPMOVQW128_128",
                argLen: 1,
                asm:    x86.AVPMOVQW,
                reg: regInfo{
@@ -29299,7 +29542,33 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:   "VPMOVQWMasked128",
+               name:   "VPMOVQW128_256",
+               argLen: 1,
+               asm:    x86.AVPMOVQW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVQW128_512",
+               argLen: 1,
+               asm:    x86.AVPMOVQW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVQWMasked128_128",
                argLen: 2,
                asm:    x86.AVPMOVQW,
                reg: regInfo{
@@ -29313,7 +29582,61 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:   "VPMOVSDB128",
+               name:   "VPMOVQWMasked128_256",
+               argLen: 2,
+               asm:    x86.AVPMOVQW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVQWMasked128_512",
+               argLen: 2,
+               asm:    x86.AVPMOVQW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVSDB128_128",
+               argLen: 1,
+               asm:    x86.AVPMOVSDB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVSDB128_256",
+               argLen: 1,
+               asm:    x86.AVPMOVSDB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVSDB128_512",
                argLen: 1,
                asm:    x86.AVPMOVSDB,
                reg: regInfo{
@@ -29326,7 +29649,35 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:   "VPMOVSDBMasked128",
+               name:   "VPMOVSDBMasked128_128",
+               argLen: 2,
+               asm:    x86.AVPMOVSDB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVSDBMasked128_256",
+               argLen: 2,
+               asm:    x86.AVPMOVSDB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVSDBMasked128_512",
                argLen: 2,
                asm:    x86.AVPMOVSDB,
                reg: regInfo{
@@ -29340,7 +29691,20 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:   "VPMOVSDW128",
+               name:   "VPMOVSDW128_128",
+               argLen: 1,
+               asm:    x86.AVPMOVSDW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVSDW128_256",
                argLen: 1,
                asm:    x86.AVPMOVSDW,
                reg: regInfo{
@@ -29366,7 +29730,21 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:   "VPMOVSDWMasked128",
+               name:   "VPMOVSDWMasked128_128",
+               argLen: 2,
+               asm:    x86.AVPMOVSDW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVSDWMasked128_256",
                argLen: 2,
                asm:    x86.AVPMOVSDW,
                reg: regInfo{
@@ -29394,7 +29772,7 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:   "VPMOVSQB128",
+               name:   "VPMOVSQB128_128",
                argLen: 1,
                asm:    x86.AVPMOVSQB,
                reg: regInfo{
@@ -29407,7 +29785,61 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:   "VPMOVSQBMasked128",
+               name:   "VPMOVSQB128_256",
+               argLen: 1,
+               asm:    x86.AVPMOVSQB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVSQB128_512",
+               argLen: 1,
+               asm:    x86.AVPMOVSQB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVSQBMasked128_128",
+               argLen: 2,
+               asm:    x86.AVPMOVSQB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVSQBMasked128_256",
+               argLen: 2,
+               asm:    x86.AVPMOVSQB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVSQBMasked128_512",
                argLen: 2,
                asm:    x86.AVPMOVSQB,
                reg: regInfo{
@@ -29421,7 +29853,20 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:   "VPMOVSQD128",
+               name:   "VPMOVSQD128_128",
+               argLen: 1,
+               asm:    x86.AVPMOVSQD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVSQD128_256",
                argLen: 1,
                asm:    x86.AVPMOVSQD,
                reg: regInfo{
@@ -29447,7 +29892,21 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:   "VPMOVSQDMasked128",
+               name:   "VPMOVSQDMasked128_128",
+               argLen: 2,
+               asm:    x86.AVPMOVSQD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVSQDMasked128_256",
                argLen: 2,
                asm:    x86.AVPMOVSQD,
                reg: regInfo{
@@ -29475,7 +29934,7 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:   "VPMOVSQW128",
+               name:   "VPMOVSQW128_128",
                argLen: 1,
                asm:    x86.AVPMOVSQW,
                reg: regInfo{
@@ -29488,7 +29947,47 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:   "VPMOVSQWMasked128",
+               name:   "VPMOVSQW128_256",
+               argLen: 1,
+               asm:    x86.AVPMOVSQW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVSQW128_512",
+               argLen: 1,
+               asm:    x86.AVPMOVSQW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVSQWMasked128_128",
+               argLen: 2,
+               asm:    x86.AVPMOVSQW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVSQWMasked128_256",
                argLen: 2,
                asm:    x86.AVPMOVSQW,
                reg: regInfo{
@@ -29502,7 +30001,34 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:   "VPMOVSWB128",
+               name:   "VPMOVSQWMasked128_512",
+               argLen: 2,
+               asm:    x86.AVPMOVSQW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVSWB128_128",
+               argLen: 1,
+               asm:    x86.AVPMOVSWB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVSWB128_256",
                argLen: 1,
                asm:    x86.AVPMOVSWB,
                reg: regInfo{
@@ -29528,7 +30054,21 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:   "VPMOVSWBMasked128",
+               name:   "VPMOVSWBMasked128_128",
+               argLen: 2,
+               asm:    x86.AVPMOVSWB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVSWBMasked128_256",
                argLen: 2,
                asm:    x86.AVPMOVSWB,
                reg: regInfo{
@@ -30042,7 +30582,33 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:   "VPMOVUSDB128",
+               name:   "VPMOVUSDB128_128",
+               argLen: 1,
+               asm:    x86.AVPMOVUSDB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVUSDB128_256",
+               argLen: 1,
+               asm:    x86.AVPMOVUSDB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVUSDB128_512",
                argLen: 1,
                asm:    x86.AVPMOVUSDB,
                reg: regInfo{
@@ -30055,7 +30621,21 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:   "VPMOVUSDBMasked128",
+               name:   "VPMOVUSDBMasked128_128",
+               argLen: 2,
+               asm:    x86.AVPMOVUSDB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVUSDBMasked128_256",
                argLen: 2,
                asm:    x86.AVPMOVUSDB,
                reg: regInfo{
@@ -30069,7 +30649,34 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:   "VPMOVUSDW128",
+               name:   "VPMOVUSDBMasked128_512",
+               argLen: 2,
+               asm:    x86.AVPMOVUSDB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVUSDW128_128",
+               argLen: 1,
+               asm:    x86.AVPMOVUSDW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVUSDW128_256",
                argLen: 1,
                asm:    x86.AVPMOVUSDW,
                reg: regInfo{
@@ -30095,7 +30702,21 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:   "VPMOVUSDWMasked128",
+               name:   "VPMOVUSDWMasked128_128",
+               argLen: 2,
+               asm:    x86.AVPMOVUSDW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVUSDWMasked128_256",
                argLen: 2,
                asm:    x86.AVPMOVUSDW,
                reg: regInfo{
@@ -30123,7 +30744,20 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:   "VPMOVUSQB128",
+               name:   "VPMOVUSQB128_128",
+               argLen: 1,
+               asm:    x86.AVPMOVUSQB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVUSQB128_256",
                argLen: 1,
                asm:    x86.AVPMOVUSQB,
                reg: regInfo{
@@ -30136,7 +30770,20 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:   "VPMOVUSQBMasked128",
+               name:   "VPMOVUSQB128_512",
+               argLen: 1,
+               asm:    x86.AVPMOVUSQB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVUSQBMasked128_128",
                argLen: 2,
                asm:    x86.AVPMOVUSQB,
                reg: regInfo{
@@ -30150,7 +30797,48 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:   "VPMOVUSQD128",
+               name:   "VPMOVUSQBMasked128_256",
+               argLen: 2,
+               asm:    x86.AVPMOVUSQB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVUSQBMasked128_512",
+               argLen: 2,
+               asm:    x86.AVPMOVUSQB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVUSQD128_128",
+               argLen: 1,
+               asm:    x86.AVPMOVUSQD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVUSQD128_256",
                argLen: 1,
                asm:    x86.AVPMOVUSQD,
                reg: regInfo{
@@ -30176,7 +30864,21 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:   "VPMOVUSQDMasked128",
+               name:   "VPMOVUSQDMasked128_128",
+               argLen: 2,
+               asm:    x86.AVPMOVUSQD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVUSQDMasked128_256",
                argLen: 2,
                asm:    x86.AVPMOVUSQD,
                reg: regInfo{
@@ -30204,7 +30906,20 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:   "VPMOVUSQW128",
+               name:   "VPMOVUSQW128_128",
+               argLen: 1,
+               asm:    x86.AVPMOVUSQW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVUSQW128_256",
                argLen: 1,
                asm:    x86.AVPMOVUSQW,
                reg: regInfo{
@@ -30217,7 +30932,20 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:   "VPMOVUSQWMasked128",
+               name:   "VPMOVUSQW128_512",
+               argLen: 1,
+               asm:    x86.AVPMOVUSQW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVUSQWMasked128_128",
                argLen: 2,
                asm:    x86.AVPMOVUSQW,
                reg: regInfo{
@@ -30231,7 +30959,48 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:   "VPMOVUSWB128",
+               name:   "VPMOVUSQWMasked128_256",
+               argLen: 2,
+               asm:    x86.AVPMOVUSQW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVUSQWMasked128_512",
+               argLen: 2,
+               asm:    x86.AVPMOVUSQW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVUSWB128_128",
+               argLen: 1,
+               asm:    x86.AVPMOVUSWB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVUSWB128_256",
                argLen: 1,
                asm:    x86.AVPMOVUSWB,
                reg: regInfo{
@@ -30257,7 +31026,21 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:   "VPMOVUSWBMasked128",
+               name:   "VPMOVUSWBMasked128_128",
+               argLen: 2,
+               asm:    x86.AVPMOVUSWB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVUSWBMasked128_256",
                argLen: 2,
                asm:    x86.AVPMOVUSWB,
                reg: regInfo{
@@ -30285,7 +31068,20 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:   "VPMOVWB128",
+               name:   "VPMOVWB128_128",
+               argLen: 1,
+               asm:    x86.AVPMOVWB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVWB128_256",
                argLen: 1,
                asm:    x86.AVPMOVWB,
                reg: regInfo{
@@ -30311,7 +31107,21 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:   "VPMOVWBMasked128",
+               name:   "VPMOVWBMasked128_128",
+               argLen: 2,
+               asm:    x86.AVPMOVWB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VPMOVWBMasked128_256",
                argLen: 2,
                asm:    x86.AVPMOVWB,
                reg: regInfo{
@@ -52247,7 +53057,7 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:         "VPMOVDBMasked128Merging",
+               name:         "VPMOVDBMasked128_128Merging",
                argLen:       3,
                resultInArg0: true,
                asm:          x86.AVPMOVDB,
@@ -52263,7 +53073,55 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:         "VPMOVDWMasked128Merging",
+               name:         "VPMOVDBMasked128_256Merging",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.AVPMOVDB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:         "VPMOVDBMasked128_512Merging",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.AVPMOVDB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:         "VPMOVDWMasked128_128Merging",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.AVPMOVDW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:         "VPMOVDWMasked128_256Merging",
                argLen:       3,
                resultInArg0: true,
                asm:          x86.AVPMOVDW,
@@ -52295,7 +53153,39 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:         "VPMOVQBMasked128Merging",
+               name:         "VPMOVQBMasked128_128Merging",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.AVPMOVQB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:         "VPMOVQBMasked128_256Merging",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.AVPMOVQB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:         "VPMOVQBMasked128_512Merging",
                argLen:       3,
                resultInArg0: true,
                asm:          x86.AVPMOVQB,
@@ -52311,7 +53201,23 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:         "VPMOVQDMasked128Merging",
+               name:         "VPMOVQDMasked128_128Merging",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.AVPMOVQD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:         "VPMOVQDMasked128_256Merging",
                argLen:       3,
                resultInArg0: true,
                asm:          x86.AVPMOVQD,
@@ -52343,7 +53249,7 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:         "VPMOVQWMasked128Merging",
+               name:         "VPMOVQWMasked128_128Merging",
                argLen:       3,
                resultInArg0: true,
                asm:          x86.AVPMOVQW,
@@ -52359,7 +53265,39 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:         "VPMOVSDBMasked128Merging",
+               name:         "VPMOVQWMasked128_256Merging",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.AVPMOVQW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:         "VPMOVQWMasked128_512Merging",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.AVPMOVQW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:         "VPMOVSDBMasked128_128Merging",
                argLen:       3,
                resultInArg0: true,
                asm:          x86.AVPMOVSDB,
@@ -52375,7 +53313,55 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:         "VPMOVSDWMasked128Merging",
+               name:         "VPMOVSDBMasked128_256Merging",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.AVPMOVSDB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:         "VPMOVSDBMasked128_512Merging",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.AVPMOVSDB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:         "VPMOVSDWMasked128_128Merging",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.AVPMOVSDW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:         "VPMOVSDWMasked128_256Merging",
                argLen:       3,
                resultInArg0: true,
                asm:          x86.AVPMOVSDW,
@@ -52407,7 +53393,23 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:         "VPMOVSQBMasked128Merging",
+               name:         "VPMOVSQBMasked128_128Merging",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.AVPMOVSQB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:         "VPMOVSQBMasked128_256Merging",
                argLen:       3,
                resultInArg0: true,
                asm:          x86.AVPMOVSQB,
@@ -52423,7 +53425,39 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:         "VPMOVSQDMasked128Merging",
+               name:         "VPMOVSQBMasked128_512Merging",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.AVPMOVSQB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:         "VPMOVSQDMasked128_128Merging",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.AVPMOVSQD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:         "VPMOVSQDMasked128_256Merging",
                argLen:       3,
                resultInArg0: true,
                asm:          x86.AVPMOVSQD,
@@ -52455,7 +53489,23 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:         "VPMOVSQWMasked128Merging",
+               name:         "VPMOVSQWMasked128_128Merging",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.AVPMOVSQW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:         "VPMOVSQWMasked128_256Merging",
                argLen:       3,
                resultInArg0: true,
                asm:          x86.AVPMOVSQW,
@@ -52471,7 +53521,39 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:         "VPMOVSWBMasked128Merging",
+               name:         "VPMOVSQWMasked128_512Merging",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.AVPMOVSQW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:         "VPMOVSWBMasked128_128Merging",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.AVPMOVSWB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:         "VPMOVSWBMasked128_256Merging",
                argLen:       3,
                resultInArg0: true,
                asm:          x86.AVPMOVSWB,
@@ -52791,7 +53873,39 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:         "VPMOVUSDBMasked128Merging",
+               name:         "VPMOVUSDBMasked128_128Merging",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.AVPMOVUSDB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:         "VPMOVUSDBMasked128_256Merging",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.AVPMOVUSDB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:         "VPMOVUSDBMasked128_512Merging",
                argLen:       3,
                resultInArg0: true,
                asm:          x86.AVPMOVUSDB,
@@ -52807,7 +53921,23 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:         "VPMOVUSDWMasked128Merging",
+               name:         "VPMOVUSDWMasked128_128Merging",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.AVPMOVUSDW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:         "VPMOVUSDWMasked128_256Merging",
                argLen:       3,
                resultInArg0: true,
                asm:          x86.AVPMOVUSDW,
@@ -52839,7 +53969,39 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:         "VPMOVUSQBMasked128Merging",
+               name:         "VPMOVUSQBMasked128_128Merging",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.AVPMOVUSQB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:         "VPMOVUSQBMasked128_256Merging",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.AVPMOVUSQB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:         "VPMOVUSQBMasked128_512Merging",
                argLen:       3,
                resultInArg0: true,
                asm:          x86.AVPMOVUSQB,
@@ -52855,7 +54017,23 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:         "VPMOVUSQDMasked128Merging",
+               name:         "VPMOVUSQDMasked128_128Merging",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.AVPMOVUSQD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:         "VPMOVUSQDMasked128_256Merging",
                argLen:       3,
                resultInArg0: true,
                asm:          x86.AVPMOVUSQD,
@@ -52887,7 +54065,7 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:         "VPMOVUSQWMasked128Merging",
+               name:         "VPMOVUSQWMasked128_128Merging",
                argLen:       3,
                resultInArg0: true,
                asm:          x86.AVPMOVUSQW,
@@ -52903,7 +54081,55 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:         "VPMOVUSWBMasked128Merging",
+               name:         "VPMOVUSQWMasked128_256Merging",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.AVPMOVUSQW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:         "VPMOVUSQWMasked128_512Merging",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.AVPMOVUSQW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:         "VPMOVUSWBMasked128_128Merging",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.AVPMOVUSWB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:         "VPMOVUSWBMasked128_256Merging",
                argLen:       3,
                resultInArg0: true,
                asm:          x86.AVPMOVUSWB,
@@ -52935,7 +54161,23 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:         "VPMOVWBMasked128Merging",
+               name:         "VPMOVWBMasked128_128Merging",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.AVPMOVWB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:         "VPMOVWBMasked128_256Merging",
                argLen:       3,
                resultInArg0: true,
                asm:          x86.AVPMOVWB,
index bf0e79de0bfe8c845f5af2332962c32ae952652d..974af9d842da17116cfc8a385d16fe477053b80b 100644 (file)
@@ -2574,19 +2574,19 @@ func rewriteValueAMD64(v *Value) bool {
                v.Op = OpAMD64VPMOVDW256
                return true
        case OpConvertToInt16Int32x4:
-               v.Op = OpAMD64VPMOVDW128
+               v.Op = OpAMD64VPMOVDW128_128
                return true
        case OpConvertToInt16Int32x8:
-               v.Op = OpAMD64VPMOVDW128
+               v.Op = OpAMD64VPMOVDW128_256
                return true
        case OpConvertToInt16Int64x2:
-               v.Op = OpAMD64VPMOVQW128
+               v.Op = OpAMD64VPMOVQW128_128
                return true
        case OpConvertToInt16Int64x4:
-               v.Op = OpAMD64VPMOVQW128
+               v.Op = OpAMD64VPMOVQW128_256
                return true
        case OpConvertToInt16Int64x8:
-               v.Op = OpAMD64VPMOVQW128
+               v.Op = OpAMD64VPMOVQW128_512
                return true
        case OpConvertToInt16Int8x16:
                v.Op = OpAMD64VPMOVSXBW256
@@ -2598,19 +2598,19 @@ func rewriteValueAMD64(v *Value) bool {
                v.Op = OpAMD64VPMOVSDW256
                return true
        case OpConvertToInt16SaturatedInt32x4:
-               v.Op = OpAMD64VPMOVSDW128
+               v.Op = OpAMD64VPMOVSDW128_128
                return true
        case OpConvertToInt16SaturatedInt32x8:
-               v.Op = OpAMD64VPMOVSDW128
+               v.Op = OpAMD64VPMOVSDW128_256
                return true
        case OpConvertToInt16SaturatedInt64x2:
-               v.Op = OpAMD64VPMOVSQW128
+               v.Op = OpAMD64VPMOVSQW128_128
                return true
        case OpConvertToInt16SaturatedInt64x4:
-               v.Op = OpAMD64VPMOVSQW128
+               v.Op = OpAMD64VPMOVSQW128_256
                return true
        case OpConvertToInt16SaturatedInt64x8:
-               v.Op = OpAMD64VPMOVSQW128
+               v.Op = OpAMD64VPMOVSQW128_512
                return true
        case OpConvertToInt16SaturatedPackedInt32x16:
                v.Op = OpAMD64VPACKSSDW512
@@ -2640,10 +2640,10 @@ func rewriteValueAMD64(v *Value) bool {
                v.Op = OpAMD64VPMOVSXWD256
                return true
        case OpConvertToInt32Int64x2:
-               v.Op = OpAMD64VPMOVQD128
+               v.Op = OpAMD64VPMOVQD128_128
                return true
        case OpConvertToInt32Int64x4:
-               v.Op = OpAMD64VPMOVQD128
+               v.Op = OpAMD64VPMOVQD128_256
                return true
        case OpConvertToInt32Int64x8:
                v.Op = OpAMD64VPMOVQD256
@@ -2652,10 +2652,10 @@ func rewriteValueAMD64(v *Value) bool {
                v.Op = OpAMD64VPMOVSXBD512
                return true
        case OpConvertToInt32SaturatedInt64x2:
-               v.Op = OpAMD64VPMOVSQD128
+               v.Op = OpAMD64VPMOVSQD128_128
                return true
        case OpConvertToInt32SaturatedInt64x4:
-               v.Op = OpAMD64VPMOVSQD128
+               v.Op = OpAMD64VPMOVSQD128_256
                return true
        case OpConvertToInt32SaturatedInt64x8:
                v.Op = OpAMD64VPMOVSQD256
@@ -2694,58 +2694,58 @@ func rewriteValueAMD64(v *Value) bool {
                v.Op = OpAMD64VPMOVSXBQ512
                return true
        case OpConvertToInt8Int16x16:
-               v.Op = OpAMD64VPMOVWB128
+               v.Op = OpAMD64VPMOVWB128_256
                return true
        case OpConvertToInt8Int16x32:
                v.Op = OpAMD64VPMOVWB256
                return true
        case OpConvertToInt8Int16x8:
-               v.Op = OpAMD64VPMOVWB128
+               v.Op = OpAMD64VPMOVWB128_128
                return true
        case OpConvertToInt8Int32x16:
-               v.Op = OpAMD64VPMOVDB128
+               v.Op = OpAMD64VPMOVDB128_512
                return true
        case OpConvertToInt8Int32x4:
-               v.Op = OpAMD64VPMOVDB128
+               v.Op = OpAMD64VPMOVDB128_128
                return true
        case OpConvertToInt8Int32x8:
-               v.Op = OpAMD64VPMOVDB128
+               v.Op = OpAMD64VPMOVDB128_256
                return true
        case OpConvertToInt8Int64x2:
-               v.Op = OpAMD64VPMOVQB128
+               v.Op = OpAMD64VPMOVQB128_128
                return true
        case OpConvertToInt8Int64x4:
-               v.Op = OpAMD64VPMOVQB128
+               v.Op = OpAMD64VPMOVQB128_256
                return true
        case OpConvertToInt8Int64x8:
-               v.Op = OpAMD64VPMOVQB128
+               v.Op = OpAMD64VPMOVQB128_512
                return true
        case OpConvertToInt8SaturatedInt16x16:
-               v.Op = OpAMD64VPMOVSWB128
+               v.Op = OpAMD64VPMOVSWB128_256
                return true
        case OpConvertToInt8SaturatedInt16x32:
                v.Op = OpAMD64VPMOVSWB256
                return true
        case OpConvertToInt8SaturatedInt16x8:
-               v.Op = OpAMD64VPMOVSWB128
+               v.Op = OpAMD64VPMOVSWB128_128
                return true
        case OpConvertToInt8SaturatedInt32x16:
-               v.Op = OpAMD64VPMOVSDB128
+               v.Op = OpAMD64VPMOVSDB128_512
                return true
        case OpConvertToInt8SaturatedInt32x4:
-               v.Op = OpAMD64VPMOVSDB128
+               v.Op = OpAMD64VPMOVSDB128_128
                return true
        case OpConvertToInt8SaturatedInt32x8:
-               v.Op = OpAMD64VPMOVSDB128
+               v.Op = OpAMD64VPMOVSDB128_256
                return true
        case OpConvertToInt8SaturatedInt64x2:
-               v.Op = OpAMD64VPMOVSQB128
+               v.Op = OpAMD64VPMOVSQB128_128
                return true
        case OpConvertToInt8SaturatedInt64x4:
-               v.Op = OpAMD64VPMOVSQB128
+               v.Op = OpAMD64VPMOVSQB128_256
                return true
        case OpConvertToInt8SaturatedInt64x8:
-               v.Op = OpAMD64VPMOVSQB128
+               v.Op = OpAMD64VPMOVSQB128_512
                return true
        case OpConvertToUint16SaturatedPackedUint32x16:
                v.Op = OpAMD64VPACKUSDW512
@@ -2760,37 +2760,37 @@ func rewriteValueAMD64(v *Value) bool {
                v.Op = OpAMD64VPMOVUSDW256
                return true
        case OpConvertToUint16SaturatedUint32x4:
-               v.Op = OpAMD64VPMOVUSDW128
+               v.Op = OpAMD64VPMOVUSDW128_128
                return true
        case OpConvertToUint16SaturatedUint32x8:
-               v.Op = OpAMD64VPMOVUSDW128
+               v.Op = OpAMD64VPMOVUSDW128_256
                return true
        case OpConvertToUint16SaturatedUint64x2:
-               v.Op = OpAMD64VPMOVUSQW128
+               v.Op = OpAMD64VPMOVUSQW128_128
                return true
        case OpConvertToUint16SaturatedUint64x4:
-               v.Op = OpAMD64VPMOVUSQW128
+               v.Op = OpAMD64VPMOVUSQW128_256
                return true
        case OpConvertToUint16SaturatedUint64x8:
-               v.Op = OpAMD64VPMOVUSQW128
+               v.Op = OpAMD64VPMOVUSQW128_512
                return true
        case OpConvertToUint16Uint32x16:
                v.Op = OpAMD64VPMOVDW256
                return true
        case OpConvertToUint16Uint32x4:
-               v.Op = OpAMD64VPMOVDW128
+               v.Op = OpAMD64VPMOVDW128_128
                return true
        case OpConvertToUint16Uint32x8:
-               v.Op = OpAMD64VPMOVDW128
+               v.Op = OpAMD64VPMOVDW128_256
                return true
        case OpConvertToUint16Uint64x2:
-               v.Op = OpAMD64VPMOVQW128
+               v.Op = OpAMD64VPMOVQW128_128
                return true
        case OpConvertToUint16Uint64x4:
-               v.Op = OpAMD64VPMOVQW128
+               v.Op = OpAMD64VPMOVQW128_256
                return true
        case OpConvertToUint16Uint64x8:
-               v.Op = OpAMD64VPMOVQW128
+               v.Op = OpAMD64VPMOVQW128_512
                return true
        case OpConvertToUint16Uint8x16:
                v.Op = OpAMD64VPMOVZXBW256
@@ -2811,10 +2811,10 @@ func rewriteValueAMD64(v *Value) bool {
                v.Op = OpAMD64VCVTPS2UDQ256
                return true
        case OpConvertToUint32SaturatedUint64x2:
-               v.Op = OpAMD64VPMOVUSQD128
+               v.Op = OpAMD64VPMOVUSQD128_128
                return true
        case OpConvertToUint32SaturatedUint64x4:
-               v.Op = OpAMD64VPMOVUSQD128
+               v.Op = OpAMD64VPMOVUSQD128_256
                return true
        case OpConvertToUint32SaturatedUint64x8:
                v.Op = OpAMD64VPMOVUSQD256
@@ -2826,10 +2826,10 @@ func rewriteValueAMD64(v *Value) bool {
                v.Op = OpAMD64VPMOVZXWD256
                return true
        case OpConvertToUint32Uint64x2:
-               v.Op = OpAMD64VPMOVQD128
+               v.Op = OpAMD64VPMOVQD128_128
                return true
        case OpConvertToUint32Uint64x4:
-               v.Op = OpAMD64VPMOVQD128
+               v.Op = OpAMD64VPMOVQD128_256
                return true
        case OpConvertToUint32Uint64x8:
                v.Op = OpAMD64VPMOVQD256
@@ -2877,58 +2877,58 @@ func rewriteValueAMD64(v *Value) bool {
                v.Op = OpAMD64VPMOVZXBQ512
                return true
        case OpConvertToUint8SaturatedUint16x16:
-               v.Op = OpAMD64VPMOVUSWB128
+               v.Op = OpAMD64VPMOVUSWB128_256
                return true
        case OpConvertToUint8SaturatedUint16x32:
                v.Op = OpAMD64VPMOVUSWB256
                return true
        case OpConvertToUint8SaturatedUint16x8:
-               v.Op = OpAMD64VPMOVUSWB128
+               v.Op = OpAMD64VPMOVUSWB128_128
                return true
        case OpConvertToUint8SaturatedUint32x16:
-               v.Op = OpAMD64VPMOVUSDB128
+               v.Op = OpAMD64VPMOVUSDB128_512
                return true
        case OpConvertToUint8SaturatedUint32x4:
-               v.Op = OpAMD64VPMOVUSDB128
+               v.Op = OpAMD64VPMOVUSDB128_128
                return true
        case OpConvertToUint8SaturatedUint32x8:
-               v.Op = OpAMD64VPMOVUSDB128
+               v.Op = OpAMD64VPMOVUSDB128_256
                return true
        case OpConvertToUint8SaturatedUint64x2:
-               v.Op = OpAMD64VPMOVUSQB128
+               v.Op = OpAMD64VPMOVUSQB128_128
                return true
        case OpConvertToUint8SaturatedUint64x4:
-               v.Op = OpAMD64VPMOVUSQB128
+               v.Op = OpAMD64VPMOVUSQB128_256
                return true
        case OpConvertToUint8SaturatedUint64x8:
-               v.Op = OpAMD64VPMOVUSQB128
+               v.Op = OpAMD64VPMOVUSQB128_512
                return true
        case OpConvertToUint8Uint16x16:
-               v.Op = OpAMD64VPMOVWB128
+               v.Op = OpAMD64VPMOVWB128_256
                return true
        case OpConvertToUint8Uint16x32:
                v.Op = OpAMD64VPMOVWB256
                return true
        case OpConvertToUint8Uint16x8:
-               v.Op = OpAMD64VPMOVWB128
+               v.Op = OpAMD64VPMOVWB128_128
                return true
        case OpConvertToUint8Uint32x16:
-               v.Op = OpAMD64VPMOVDB128
+               v.Op = OpAMD64VPMOVDB128_512
                return true
        case OpConvertToUint8Uint32x4:
-               v.Op = OpAMD64VPMOVDB128
+               v.Op = OpAMD64VPMOVDB128_128
                return true
        case OpConvertToUint8Uint32x8:
-               v.Op = OpAMD64VPMOVDB128
+               v.Op = OpAMD64VPMOVDB128_256
                return true
        case OpConvertToUint8Uint64x2:
-               v.Op = OpAMD64VPMOVQB128
+               v.Op = OpAMD64VPMOVQB128_128
                return true
        case OpConvertToUint8Uint64x4:
-               v.Op = OpAMD64VPMOVQB128
+               v.Op = OpAMD64VPMOVQB128_256
                return true
        case OpConvertToUint8Uint64x8:
-               v.Op = OpAMD64VPMOVQB128
+               v.Op = OpAMD64VPMOVQB128_512
                return true
        case OpCopySignInt16x16:
                v.Op = OpAMD64VPSIGNW256
@@ -31243,27 +31243,27 @@ func rewriteValueAMD64_OpAMD64VMOVDQU16Masked128(v *Value) bool {
                v.AddArg2(x, mask)
                return true
        }
-       // match: (VMOVDQU16Masked128 (VPMOVWB128 x) mask)
-       // result: (VPMOVWBMasked128 x mask)
+       // match: (VMOVDQU16Masked128 (VPMOVWB128_128 x) mask)
+       // result: (VPMOVWBMasked128_128 x mask)
        for {
-               if v_0.Op != OpAMD64VPMOVWB128 {
+               if v_0.Op != OpAMD64VPMOVWB128_128 {
                        break
                }
                x := v_0.Args[0]
                mask := v_1
-               v.reset(OpAMD64VPMOVWBMasked128)
+               v.reset(OpAMD64VPMOVWBMasked128_128)
                v.AddArg2(x, mask)
                return true
        }
-       // match: (VMOVDQU16Masked128 (VPMOVSWB128 x) mask)
-       // result: (VPMOVSWBMasked128 x mask)
+       // match: (VMOVDQU16Masked128 (VPMOVSWB128_128 x) mask)
+       // result: (VPMOVSWBMasked128_128 x mask)
        for {
-               if v_0.Op != OpAMD64VPMOVSWB128 {
+               if v_0.Op != OpAMD64VPMOVSWB128_128 {
                        break
                }
                x := v_0.Args[0]
                mask := v_1
-               v.reset(OpAMD64VPMOVSWBMasked128)
+               v.reset(OpAMD64VPMOVSWBMasked128_128)
                v.AddArg2(x, mask)
                return true
        }
@@ -31291,15 +31291,15 @@ func rewriteValueAMD64_OpAMD64VMOVDQU16Masked128(v *Value) bool {
                v.AddArg2(x, mask)
                return true
        }
-       // match: (VMOVDQU16Masked128 (VPMOVUSWB128 x) mask)
-       // result: (VPMOVUSWBMasked128 x mask)
+       // match: (VMOVDQU16Masked128 (VPMOVUSWB128_128 x) mask)
+       // result: (VPMOVUSWBMasked128_128 x mask)
        for {
-               if v_0.Op != OpAMD64VPMOVUSWB128 {
+               if v_0.Op != OpAMD64VPMOVUSWB128_128 {
                        break
                }
                x := v_0.Args[0]
                mask := v_1
-               v.reset(OpAMD64VPMOVUSWBMasked128)
+               v.reset(OpAMD64VPMOVUSWBMasked128_128)
                v.AddArg2(x, mask)
                return true
        }
@@ -31781,6 +31781,18 @@ func rewriteValueAMD64_OpAMD64VMOVDQU16Masked256(v *Value) bool {
                v.AddArg2(x, mask)
                return true
        }
+       // match: (VMOVDQU16Masked256 (VPMOVWB128_256 x) mask)
+       // result: (VPMOVWBMasked128_256 x mask)
+       for {
+               if v_0.Op != OpAMD64VPMOVWB128_256 {
+                       break
+               }
+               x := v_0.Args[0]
+               mask := v_1
+               v.reset(OpAMD64VPMOVWBMasked128_256)
+               v.AddArg2(x, mask)
+               return true
+       }
        // match: (VMOVDQU16Masked256 (VPMOVWB256 x) mask)
        // result: (VPMOVWBMasked256 x mask)
        for {
@@ -31793,6 +31805,18 @@ func rewriteValueAMD64_OpAMD64VMOVDQU16Masked256(v *Value) bool {
                v.AddArg2(x, mask)
                return true
        }
+       // match: (VMOVDQU16Masked256 (VPMOVSWB128_256 x) mask)
+       // result: (VPMOVSWBMasked128_256 x mask)
+       for {
+               if v_0.Op != OpAMD64VPMOVSWB128_256 {
+                       break
+               }
+               x := v_0.Args[0]
+               mask := v_1
+               v.reset(OpAMD64VPMOVSWBMasked128_256)
+               v.AddArg2(x, mask)
+               return true
+       }
        // match: (VMOVDQU16Masked256 (VPMOVSWB256 x) mask)
        // result: (VPMOVSWBMasked256 x mask)
        for {
@@ -31817,6 +31841,18 @@ func rewriteValueAMD64_OpAMD64VMOVDQU16Masked256(v *Value) bool {
                v.AddArg2(x, mask)
                return true
        }
+       // match: (VMOVDQU16Masked256 (VPMOVUSWB128_256 x) mask)
+       // result: (VPMOVUSWBMasked128_256 x mask)
+       for {
+               if v_0.Op != OpAMD64VPMOVUSWB128_256 {
+                       break
+               }
+               x := v_0.Args[0]
+               mask := v_1
+               v.reset(OpAMD64VPMOVUSWBMasked128_256)
+               v.AddArg2(x, mask)
+               return true
+       }
        // match: (VMOVDQU16Masked256 (VPMOVUSWB256 x) mask)
        // result: (VPMOVUSWBMasked256 x mask)
        for {
@@ -32863,51 +32899,51 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked128(v *Value) bool {
                v.AddArg2(x, mask)
                return true
        }
-       // match: (VMOVDQU32Masked128 (VPMOVDB128 x) mask)
-       // result: (VPMOVDBMasked128 x mask)
+       // match: (VMOVDQU32Masked128 (VPMOVDB128_128 x) mask)
+       // result: (VPMOVDBMasked128_128 x mask)
        for {
-               if v_0.Op != OpAMD64VPMOVDB128 {
+               if v_0.Op != OpAMD64VPMOVDB128_128 {
                        break
                }
                x := v_0.Args[0]
                mask := v_1
-               v.reset(OpAMD64VPMOVDBMasked128)
+               v.reset(OpAMD64VPMOVDBMasked128_128)
                v.AddArg2(x, mask)
                return true
        }
-       // match: (VMOVDQU32Masked128 (VPMOVSDB128 x) mask)
-       // result: (VPMOVSDBMasked128 x mask)
+       // match: (VMOVDQU32Masked128 (VPMOVSDB128_128 x) mask)
+       // result: (VPMOVSDBMasked128_128 x mask)
        for {
-               if v_0.Op != OpAMD64VPMOVSDB128 {
+               if v_0.Op != OpAMD64VPMOVSDB128_128 {
                        break
                }
                x := v_0.Args[0]
                mask := v_1
-               v.reset(OpAMD64VPMOVSDBMasked128)
+               v.reset(OpAMD64VPMOVSDBMasked128_128)
                v.AddArg2(x, mask)
                return true
        }
-       // match: (VMOVDQU32Masked128 (VPMOVDW128 x) mask)
-       // result: (VPMOVDWMasked128 x mask)
+       // match: (VMOVDQU32Masked128 (VPMOVDW128_128 x) mask)
+       // result: (VPMOVDWMasked128_128 x mask)
        for {
-               if v_0.Op != OpAMD64VPMOVDW128 {
+               if v_0.Op != OpAMD64VPMOVDW128_128 {
                        break
                }
                x := v_0.Args[0]
                mask := v_1
-               v.reset(OpAMD64VPMOVDWMasked128)
+               v.reset(OpAMD64VPMOVDWMasked128_128)
                v.AddArg2(x, mask)
                return true
        }
-       // match: (VMOVDQU32Masked128 (VPMOVSDW128 x) mask)
-       // result: (VPMOVSDWMasked128 x mask)
+       // match: (VMOVDQU32Masked128 (VPMOVSDW128_128 x) mask)
+       // result: (VPMOVSDWMasked128_128 x mask)
        for {
-               if v_0.Op != OpAMD64VPMOVSDW128 {
+               if v_0.Op != OpAMD64VPMOVSDW128_128 {
                        break
                }
                x := v_0.Args[0]
                mask := v_1
-               v.reset(OpAMD64VPMOVSDWMasked128)
+               v.reset(OpAMD64VPMOVSDWMasked128_128)
                v.AddArg2(x, mask)
                return true
        }
@@ -32948,27 +32984,27 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked128(v *Value) bool {
                v.AddArg2(x, mask)
                return true
        }
-       // match: (VMOVDQU32Masked128 (VPMOVUSDB128 x) mask)
-       // result: (VPMOVUSDBMasked128 x mask)
+       // match: (VMOVDQU32Masked128 (VPMOVUSDB128_128 x) mask)
+       // result: (VPMOVUSDBMasked128_128 x mask)
        for {
-               if v_0.Op != OpAMD64VPMOVUSDB128 {
+               if v_0.Op != OpAMD64VPMOVUSDB128_128 {
                        break
                }
                x := v_0.Args[0]
                mask := v_1
-               v.reset(OpAMD64VPMOVUSDBMasked128)
+               v.reset(OpAMD64VPMOVUSDBMasked128_128)
                v.AddArg2(x, mask)
                return true
        }
-       // match: (VMOVDQU32Masked128 (VPMOVUSDW128 x) mask)
-       // result: (VPMOVUSDWMasked128 x mask)
+       // match: (VMOVDQU32Masked128 (VPMOVUSDW128_128 x) mask)
+       // result: (VPMOVUSDWMasked128_128 x mask)
        for {
-               if v_0.Op != OpAMD64VPMOVUSDW128 {
+               if v_0.Op != OpAMD64VPMOVUSDW128_128 {
                        break
                }
                x := v_0.Args[0]
                mask := v_1
-               v.reset(OpAMD64VPMOVUSDWMasked128)
+               v.reset(OpAMD64VPMOVUSDWMasked128_128)
                v.AddArg2(x, mask)
                return true
        }
@@ -33626,6 +33662,42 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked256(v *Value) bool {
                v.AddArg2(x, mask)
                return true
        }
+       // match: (VMOVDQU32Masked256 (VPMOVDB128_256 x) mask)
+       // result: (VPMOVDBMasked128_256 x mask)
+       for {
+               if v_0.Op != OpAMD64VPMOVDB128_256 {
+                       break
+               }
+               x := v_0.Args[0]
+               mask := v_1
+               v.reset(OpAMD64VPMOVDBMasked128_256)
+               v.AddArg2(x, mask)
+               return true
+       }
+       // match: (VMOVDQU32Masked256 (VPMOVSDB128_256 x) mask)
+       // result: (VPMOVSDBMasked128_256 x mask)
+       for {
+               if v_0.Op != OpAMD64VPMOVSDB128_256 {
+                       break
+               }
+               x := v_0.Args[0]
+               mask := v_1
+               v.reset(OpAMD64VPMOVSDBMasked128_256)
+               v.AddArg2(x, mask)
+               return true
+       }
+       // match: (VMOVDQU32Masked256 (VPMOVDW128_256 x) mask)
+       // result: (VPMOVDWMasked128_256 x mask)
+       for {
+               if v_0.Op != OpAMD64VPMOVDW128_256 {
+                       break
+               }
+               x := v_0.Args[0]
+               mask := v_1
+               v.reset(OpAMD64VPMOVDWMasked128_256)
+               v.AddArg2(x, mask)
+               return true
+       }
        // match: (VMOVDQU32Masked256 (VPMOVDW256 x) mask)
        // result: (VPMOVDWMasked256 x mask)
        for {
@@ -33638,6 +33710,18 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked256(v *Value) bool {
                v.AddArg2(x, mask)
                return true
        }
+       // match: (VMOVDQU32Masked256 (VPMOVSDW128_256 x) mask)
+       // result: (VPMOVSDWMasked128_256 x mask)
+       for {
+               if v_0.Op != OpAMD64VPMOVSDW128_256 {
+                       break
+               }
+               x := v_0.Args[0]
+               mask := v_1
+               v.reset(OpAMD64VPMOVSDWMasked128_256)
+               v.AddArg2(x, mask)
+               return true
+       }
        // match: (VMOVDQU32Masked256 (VPMOVSDW256 x) mask)
        // result: (VPMOVSDWMasked256 x mask)
        for {
@@ -33687,6 +33771,30 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked256(v *Value) bool {
                v.AddArg2(x, mask)
                return true
        }
+       // match: (VMOVDQU32Masked256 (VPMOVUSDB128_256 x) mask)
+       // result: (VPMOVUSDBMasked128_256 x mask)
+       for {
+               if v_0.Op != OpAMD64VPMOVUSDB128_256 {
+                       break
+               }
+               x := v_0.Args[0]
+               mask := v_1
+               v.reset(OpAMD64VPMOVUSDBMasked128_256)
+               v.AddArg2(x, mask)
+               return true
+       }
+       // match: (VMOVDQU32Masked256 (VPMOVUSDW128_256 x) mask)
+       // result: (VPMOVUSDWMasked128_256 x mask)
+       for {
+               if v_0.Op != OpAMD64VPMOVUSDW128_256 {
+                       break
+               }
+               x := v_0.Args[0]
+               mask := v_1
+               v.reset(OpAMD64VPMOVUSDWMasked128_256)
+               v.AddArg2(x, mask)
+               return true
+       }
        // match: (VMOVDQU32Masked256 (VPMOVUSDW256 x) mask)
        // result: (VPMOVUSDWMasked256 x mask)
        for {
@@ -34405,6 +34513,30 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked512(v *Value) bool {
                v.AddArg2(x, mask)
                return true
        }
+       // match: (VMOVDQU32Masked512 (VPMOVDB128_512 x) mask)
+       // result: (VPMOVDBMasked128_512 x mask)
+       for {
+               if v_0.Op != OpAMD64VPMOVDB128_512 {
+                       break
+               }
+               x := v_0.Args[0]
+               mask := v_1
+               v.reset(OpAMD64VPMOVDBMasked128_512)
+               v.AddArg2(x, mask)
+               return true
+       }
+       // match: (VMOVDQU32Masked512 (VPMOVSDB128_512 x) mask)
+       // result: (VPMOVSDBMasked128_512 x mask)
+       for {
+               if v_0.Op != OpAMD64VPMOVSDB128_512 {
+                       break
+               }
+               x := v_0.Args[0]
+               mask := v_1
+               v.reset(OpAMD64VPMOVSDBMasked128_512)
+               v.AddArg2(x, mask)
+               return true
+       }
        // match: (VMOVDQU32Masked512 (VPACKSSDW512 x y) mask)
        // result: (VPACKSSDWMasked512 x y mask)
        for {
@@ -34442,6 +34574,18 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked512(v *Value) bool {
                v.AddArg2(x, mask)
                return true
        }
+       // match: (VMOVDQU32Masked512 (VPMOVUSDB128_512 x) mask)
+       // result: (VPMOVUSDBMasked128_512 x mask)
+       for {
+               if v_0.Op != OpAMD64VPMOVUSDB128_512 {
+                       break
+               }
+               x := v_0.Args[0]
+               mask := v_1
+               v.reset(OpAMD64VPMOVUSDBMasked128_512)
+               v.AddArg2(x, mask)
+               return true
+       }
        // match: (VMOVDQU32Masked512 (VPACKUSDW512 x y) mask)
        // result: (VPACKUSDWMasked512 x y mask)
        for {
@@ -35132,111 +35276,111 @@ func rewriteValueAMD64_OpAMD64VMOVDQU64Masked128(v *Value) bool {
                v.AddArg2(x, mask)
                return true
        }
-       // match: (VMOVDQU64Masked128 (VPMOVQB128 x) mask)
-       // result: (VPMOVQBMasked128 x mask)
+       // match: (VMOVDQU64Masked128 (VPMOVQB128_128 x) mask)
+       // result: (VPMOVQBMasked128_128 x mask)
        for {
-               if v_0.Op != OpAMD64VPMOVQB128 {
+               if v_0.Op != OpAMD64VPMOVQB128_128 {
                        break
                }
                x := v_0.Args[0]
                mask := v_1
-               v.reset(OpAMD64VPMOVQBMasked128)
+               v.reset(OpAMD64VPMOVQBMasked128_128)
                v.AddArg2(x, mask)
                return true
        }
-       // match: (VMOVDQU64Masked128 (VPMOVSQB128 x) mask)
-       // result: (VPMOVSQBMasked128 x mask)
+       // match: (VMOVDQU64Masked128 (VPMOVSQB128_128 x) mask)
+       // result: (VPMOVSQBMasked128_128 x mask)
        for {
-               if v_0.Op != OpAMD64VPMOVSQB128 {
+               if v_0.Op != OpAMD64VPMOVSQB128_128 {
                        break
                }
                x := v_0.Args[0]
                mask := v_1
-               v.reset(OpAMD64VPMOVSQBMasked128)
+               v.reset(OpAMD64VPMOVSQBMasked128_128)
                v.AddArg2(x, mask)
                return true
        }
-       // match: (VMOVDQU64Masked128 (VPMOVQW128 x) mask)
-       // result: (VPMOVQWMasked128 x mask)
+       // match: (VMOVDQU64Masked128 (VPMOVQW128_128 x) mask)
+       // result: (VPMOVQWMasked128_128 x mask)
        for {
-               if v_0.Op != OpAMD64VPMOVQW128 {
+               if v_0.Op != OpAMD64VPMOVQW128_128 {
                        break
                }
                x := v_0.Args[0]
                mask := v_1
-               v.reset(OpAMD64VPMOVQWMasked128)
+               v.reset(OpAMD64VPMOVQWMasked128_128)
                v.AddArg2(x, mask)
                return true
        }
-       // match: (VMOVDQU64Masked128 (VPMOVSQW128 x) mask)
-       // result: (VPMOVSQWMasked128 x mask)
+       // match: (VMOVDQU64Masked128 (VPMOVSQW128_128 x) mask)
+       // result: (VPMOVSQWMasked128_128 x mask)
        for {
-               if v_0.Op != OpAMD64VPMOVSQW128 {
+               if v_0.Op != OpAMD64VPMOVSQW128_128 {
                        break
                }
                x := v_0.Args[0]
                mask := v_1
-               v.reset(OpAMD64VPMOVSQWMasked128)
+               v.reset(OpAMD64VPMOVSQWMasked128_128)
                v.AddArg2(x, mask)
                return true
        }
-       // match: (VMOVDQU64Masked128 (VPMOVQD128 x) mask)
-       // result: (VPMOVQDMasked128 x mask)
+       // match: (VMOVDQU64Masked128 (VPMOVQD128_128 x) mask)
+       // result: (VPMOVQDMasked128_128 x mask)
        for {
-               if v_0.Op != OpAMD64VPMOVQD128 {
+               if v_0.Op != OpAMD64VPMOVQD128_128 {
                        break
                }
                x := v_0.Args[0]
                mask := v_1
-               v.reset(OpAMD64VPMOVQDMasked128)
+               v.reset(OpAMD64VPMOVQDMasked128_128)
                v.AddArg2(x, mask)
                return true
        }
-       // match: (VMOVDQU64Masked128 (VPMOVSQD128 x) mask)
-       // result: (VPMOVSQDMasked128 x mask)
+       // match: (VMOVDQU64Masked128 (VPMOVSQD128_128 x) mask)
+       // result: (VPMOVSQDMasked128_128 x mask)
        for {
-               if v_0.Op != OpAMD64VPMOVSQD128 {
+               if v_0.Op != OpAMD64VPMOVSQD128_128 {
                        break
                }
                x := v_0.Args[0]
                mask := v_1
-               v.reset(OpAMD64VPMOVSQDMasked128)
+               v.reset(OpAMD64VPMOVSQDMasked128_128)
                v.AddArg2(x, mask)
                return true
        }
-       // match: (VMOVDQU64Masked128 (VPMOVUSQB128 x) mask)
-       // result: (VPMOVUSQBMasked128 x mask)
+       // match: (VMOVDQU64Masked128 (VPMOVUSQB128_128 x) mask)
+       // result: (VPMOVUSQBMasked128_128 x mask)
        for {
-               if v_0.Op != OpAMD64VPMOVUSQB128 {
+               if v_0.Op != OpAMD64VPMOVUSQB128_128 {
                        break
                }
                x := v_0.Args[0]
                mask := v_1
-               v.reset(OpAMD64VPMOVUSQBMasked128)
+               v.reset(OpAMD64VPMOVUSQBMasked128_128)
                v.AddArg2(x, mask)
                return true
        }
-       // match: (VMOVDQU64Masked128 (VPMOVUSQW128 x) mask)
-       // result: (VPMOVUSQWMasked128 x mask)
+       // match: (VMOVDQU64Masked128 (VPMOVUSQW128_128 x) mask)
+       // result: (VPMOVUSQWMasked128_128 x mask)
        for {
-               if v_0.Op != OpAMD64VPMOVUSQW128 {
+               if v_0.Op != OpAMD64VPMOVUSQW128_128 {
                        break
                }
                x := v_0.Args[0]
                mask := v_1
-               v.reset(OpAMD64VPMOVUSQWMasked128)
+               v.reset(OpAMD64VPMOVUSQWMasked128_128)
                v.AddArg2(x, mask)
                return true
        }
-       // match: (VMOVDQU64Masked128 (VPMOVUSQD128 x) mask)
-       // result: (VPMOVUSQDMasked128 x mask)
+       // match: (VMOVDQU64Masked128 (VPMOVUSQD128_128 x) mask)
+       // result: (VPMOVUSQDMasked128_128 x mask)
        for {
-               if v_0.Op != OpAMD64VPMOVUSQD128 {
+               if v_0.Op != OpAMD64VPMOVUSQD128_128 {
                        break
                }
                x := v_0.Args[0]
                mask := v_1
-               v.reset(OpAMD64VPMOVUSQDMasked128)
+               v.reset(OpAMD64VPMOVUSQDMasked128_128)
                v.AddArg2(x, mask)
                return true
        }
@@ -35839,6 +35983,66 @@ func rewriteValueAMD64_OpAMD64VMOVDQU64Masked256(v *Value) bool {
                v.AddArg2(x, mask)
                return true
        }
+       // match: (VMOVDQU64Masked256 (VPMOVQB128_256 x) mask)
+       // result: (VPMOVQBMasked128_256 x mask)
+       for {
+               if v_0.Op != OpAMD64VPMOVQB128_256 {
+                       break
+               }
+               x := v_0.Args[0]
+               mask := v_1
+               v.reset(OpAMD64VPMOVQBMasked128_256)
+               v.AddArg2(x, mask)
+               return true
+       }
+       // match: (VMOVDQU64Masked256 (VPMOVSQB128_256 x) mask)
+       // result: (VPMOVSQBMasked128_256 x mask)
+       for {
+               if v_0.Op != OpAMD64VPMOVSQB128_256 {
+                       break
+               }
+               x := v_0.Args[0]
+               mask := v_1
+               v.reset(OpAMD64VPMOVSQBMasked128_256)
+               v.AddArg2(x, mask)
+               return true
+       }
+       // match: (VMOVDQU64Masked256 (VPMOVQW128_256 x) mask)
+       // result: (VPMOVQWMasked128_256 x mask)
+       for {
+               if v_0.Op != OpAMD64VPMOVQW128_256 {
+                       break
+               }
+               x := v_0.Args[0]
+               mask := v_1
+               v.reset(OpAMD64VPMOVQWMasked128_256)
+               v.AddArg2(x, mask)
+               return true
+       }
+       // match: (VMOVDQU64Masked256 (VPMOVSQW128_256 x) mask)
+       // result: (VPMOVSQWMasked128_256 x mask)
+       for {
+               if v_0.Op != OpAMD64VPMOVSQW128_256 {
+                       break
+               }
+               x := v_0.Args[0]
+               mask := v_1
+               v.reset(OpAMD64VPMOVSQWMasked128_256)
+               v.AddArg2(x, mask)
+               return true
+       }
+       // match: (VMOVDQU64Masked256 (VPMOVQD128_256 x) mask)
+       // result: (VPMOVQDMasked128_256 x mask)
+       for {
+               if v_0.Op != OpAMD64VPMOVQD128_256 {
+                       break
+               }
+               x := v_0.Args[0]
+               mask := v_1
+               v.reset(OpAMD64VPMOVQDMasked128_256)
+               v.AddArg2(x, mask)
+               return true
+       }
        // match: (VMOVDQU64Masked256 (VPMOVQD256 x) mask)
        // result: (VPMOVQDMasked256 x mask)
        for {
@@ -35851,6 +36055,18 @@ func rewriteValueAMD64_OpAMD64VMOVDQU64Masked256(v *Value) bool {
                v.AddArg2(x, mask)
                return true
        }
+       // match: (VMOVDQU64Masked256 (VPMOVSQD128_256 x) mask)
+       // result: (VPMOVSQDMasked128_256 x mask)
+       for {
+               if v_0.Op != OpAMD64VPMOVSQD128_256 {
+                       break
+               }
+               x := v_0.Args[0]
+               mask := v_1
+               v.reset(OpAMD64VPMOVSQDMasked128_256)
+               v.AddArg2(x, mask)
+               return true
+       }
        // match: (VMOVDQU64Masked256 (VPMOVSQD256 x) mask)
        // result: (VPMOVSQDMasked256 x mask)
        for {
@@ -35863,6 +36079,42 @@ func rewriteValueAMD64_OpAMD64VMOVDQU64Masked256(v *Value) bool {
                v.AddArg2(x, mask)
                return true
        }
+       // match: (VMOVDQU64Masked256 (VPMOVUSQB128_256 x) mask)
+       // result: (VPMOVUSQBMasked128_256 x mask)
+       for {
+               if v_0.Op != OpAMD64VPMOVUSQB128_256 {
+                       break
+               }
+               x := v_0.Args[0]
+               mask := v_1
+               v.reset(OpAMD64VPMOVUSQBMasked128_256)
+               v.AddArg2(x, mask)
+               return true
+       }
+       // match: (VMOVDQU64Masked256 (VPMOVUSQW128_256 x) mask)
+       // result: (VPMOVUSQWMasked128_256 x mask)
+       for {
+               if v_0.Op != OpAMD64VPMOVUSQW128_256 {
+                       break
+               }
+               x := v_0.Args[0]
+               mask := v_1
+               v.reset(OpAMD64VPMOVUSQWMasked128_256)
+               v.AddArg2(x, mask)
+               return true
+       }
+       // match: (VMOVDQU64Masked256 (VPMOVUSQD128_256 x) mask)
+       // result: (VPMOVUSQDMasked128_256 x mask)
+       for {
+               if v_0.Op != OpAMD64VPMOVUSQD128_256 {
+                       break
+               }
+               x := v_0.Args[0]
+               mask := v_1
+               v.reset(OpAMD64VPMOVUSQDMasked128_256)
+               v.AddArg2(x, mask)
+               return true
+       }
        // match: (VMOVDQU64Masked256 (VPMOVUSQD256 x) mask)
        // result: (VPMOVUSQDMasked256 x mask)
        for {
@@ -36526,6 +36778,78 @@ func rewriteValueAMD64_OpAMD64VMOVDQU64Masked512(v *Value) bool {
                v.AddArg2(x, mask)
                return true
        }
+       // match: (VMOVDQU64Masked512 (VPMOVQB128_512 x) mask)
+       // result: (VPMOVQBMasked128_512 x mask)
+       for {
+               if v_0.Op != OpAMD64VPMOVQB128_512 {
+                       break
+               }
+               x := v_0.Args[0]
+               mask := v_1
+               v.reset(OpAMD64VPMOVQBMasked128_512)
+               v.AddArg2(x, mask)
+               return true
+       }
+       // match: (VMOVDQU64Masked512 (VPMOVSQB128_512 x) mask)
+       // result: (VPMOVSQBMasked128_512 x mask)
+       for {
+               if v_0.Op != OpAMD64VPMOVSQB128_512 {
+                       break
+               }
+               x := v_0.Args[0]
+               mask := v_1
+               v.reset(OpAMD64VPMOVSQBMasked128_512)
+               v.AddArg2(x, mask)
+               return true
+       }
+       // match: (VMOVDQU64Masked512 (VPMOVQW128_512 x) mask)
+       // result: (VPMOVQWMasked128_512 x mask)
+       for {
+               if v_0.Op != OpAMD64VPMOVQW128_512 {
+                       break
+               }
+               x := v_0.Args[0]
+               mask := v_1
+               v.reset(OpAMD64VPMOVQWMasked128_512)
+               v.AddArg2(x, mask)
+               return true
+       }
+       // match: (VMOVDQU64Masked512 (VPMOVSQW128_512 x) mask)
+       // result: (VPMOVSQWMasked128_512 x mask)
+       for {
+               if v_0.Op != OpAMD64VPMOVSQW128_512 {
+                       break
+               }
+               x := v_0.Args[0]
+               mask := v_1
+               v.reset(OpAMD64VPMOVSQWMasked128_512)
+               v.AddArg2(x, mask)
+               return true
+       }
+       // match: (VMOVDQU64Masked512 (VPMOVUSQB128_512 x) mask)
+       // result: (VPMOVUSQBMasked128_512 x mask)
+       for {
+               if v_0.Op != OpAMD64VPMOVUSQB128_512 {
+                       break
+               }
+               x := v_0.Args[0]
+               mask := v_1
+               v.reset(OpAMD64VPMOVUSQBMasked128_512)
+               v.AddArg2(x, mask)
+               return true
+       }
+       // match: (VMOVDQU64Masked512 (VPMOVUSQW128_512 x) mask)
+       // result: (VPMOVUSQWMasked128_512 x mask)
+       for {
+               if v_0.Op != OpAMD64VPMOVUSQW128_512 {
+                       break
+               }
+               x := v_0.Args[0]
+               mask := v_1
+               v.reset(OpAMD64VPMOVUSQWMasked128_512)
+               v.AddArg2(x, mask)
+               return true
+       }
        // match: (VMOVDQU64Masked512 (VDIVPD512 x y) mask)
        // result: (VDIVPDMasked512 x y mask)
        for {
@@ -40279,61 +40603,19 @@ func rewriteValueAMD64_OpAMD64VPBLENDMBMasked512(v *Value) bool {
                v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMBMasked512 dst (VPOPCNTB512 x) mask)
-       // result: (VPOPCNTBMasked512Merging dst x mask)
+       // match: (VPBLENDMBMasked512 dst (VPABSB512 x) mask)
+       // result: (VPABSBMasked512Merging dst x mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPOPCNTB512 {
+               if v_1.Op != OpAMD64VPABSB512 {
                        break
                }
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPOPCNTBMasked512Merging)
+               v.reset(OpAMD64VPABSBMasked512Merging)
                v.AddArg3(dst, x, mask)
                return true
        }
-       // match: (VPBLENDMBMasked512 dst (VPSUBSB512 x y) mask)
-       // result: (VPSUBSBMasked512Merging dst x y mask)
-       for {
-               dst := v_0
-               if v_1.Op != OpAMD64VPSUBSB512 {
-                       break
-               }
-               y := v_1.Args[1]
-               x := v_1.Args[0]
-               mask := v_2
-               v.reset(OpAMD64VPSUBSBMasked512Merging)
-               v.AddArg4(dst, x, y, mask)
-               return true
-       }
-       // match: (VPBLENDMBMasked512 dst (VPSUBB512 x y) mask)
-       // result: (VPSUBBMasked512Merging dst x y mask)
-       for {
-               dst := v_0
-               if v_1.Op != OpAMD64VPSUBB512 {
-                       break
-               }
-               y := v_1.Args[1]
-               x := v_1.Args[0]
-               mask := v_2
-               v.reset(OpAMD64VPSUBBMasked512Merging)
-               v.AddArg4(dst, x, y, mask)
-               return true
-       }
-       // match: (VPBLENDMBMasked512 dst (VPMINSB512 x y) mask)
-       // result: (VPMINSBMasked512Merging dst x y mask)
-       for {
-               dst := v_0
-               if v_1.Op != OpAMD64VPMINSB512 {
-                       break
-               }
-               y := v_1.Args[1]
-               x := v_1.Args[0]
-               mask := v_2
-               v.reset(OpAMD64VPMINSBMasked512Merging)
-               v.AddArg4(dst, x, y, mask)
-               return true
-       }
        // match: (VPBLENDMBMasked512 dst (VPADDB512 x y) mask)
        // result: (VPADDBMasked512Merging dst x y mask)
        for {
@@ -40348,17 +40630,17 @@ func rewriteValueAMD64_OpAMD64VPBLENDMBMasked512(v *Value) bool {
                v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMBMasked512 dst (VPMAXUB512 x y) mask)
-       // result: (VPMAXUBMasked512Merging dst x y mask)
+       // match: (VPBLENDMBMasked512 dst (VPADDSB512 x y) mask)
+       // result: (VPADDSBMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMAXUB512 {
+               if v_1.Op != OpAMD64VPADDSB512 {
                        break
                }
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPMAXUBMasked512Merging)
+               v.reset(OpAMD64VPADDSBMasked512Merging)
                v.AddArg4(dst, x, y, mask)
                return true
        }
@@ -40390,48 +40672,75 @@ func rewriteValueAMD64_OpAMD64VPBLENDMBMasked512(v *Value) bool {
                v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMBMasked512 dst (VPMINUB512 x y) mask)
-       // result: (VPMINUBMasked512Merging dst x y mask)
+       // match: (VPBLENDMBMasked512 dst (VPMAXSB512 x y) mask)
+       // result: (VPMAXSBMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMINUB512 {
+               if v_1.Op != OpAMD64VPMAXSB512 {
                        break
                }
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPMINUBMasked512Merging)
+               v.reset(OpAMD64VPMAXSBMasked512Merging)
                v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMBMasked512 dst (VPMAXSB512 x y) mask)
-       // result: (VPMAXSBMasked512Merging dst x y mask)
+       // match: (VPBLENDMBMasked512 dst (VPMAXUB512 x y) mask)
+       // result: (VPMAXUBMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMAXSB512 {
+               if v_1.Op != OpAMD64VPMAXUB512 {
                        break
                }
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPMAXSBMasked512Merging)
+               v.reset(OpAMD64VPMAXUBMasked512Merging)
                v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMBMasked512 dst (VPSUBUSB512 x y) mask)
-       // result: (VPSUBUSBMasked512Merging dst x y mask)
+       // match: (VPBLENDMBMasked512 dst (VPMINSB512 x y) mask)
+       // result: (VPMINSBMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSUBUSB512 {
+               if v_1.Op != OpAMD64VPMINSB512 {
                        break
                }
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPSUBUSBMasked512Merging)
+               v.reset(OpAMD64VPMINSBMasked512Merging)
+               v.AddArg4(dst, x, y, mask)
+               return true
+       }
+       // match: (VPBLENDMBMasked512 dst (VPMINUB512 x y) mask)
+       // result: (VPMINUBMasked512Merging dst x y mask)
+       for {
+               dst := v_0
+               if v_1.Op != OpAMD64VPMINUB512 {
+                       break
+               }
+               y := v_1.Args[1]
+               x := v_1.Args[0]
+               mask := v_2
+               v.reset(OpAMD64VPMINUBMasked512Merging)
                v.AddArg4(dst, x, y, mask)
                return true
        }
+       // match: (VPBLENDMBMasked512 dst (VPOPCNTB512 x) mask)
+       // result: (VPOPCNTBMasked512Merging dst x mask)
+       for {
+               dst := v_0
+               if v_1.Op != OpAMD64VPOPCNTB512 {
+                       break
+               }
+               x := v_1.Args[0]
+               mask := v_2
+               v.reset(OpAMD64VPOPCNTBMasked512Merging)
+               v.AddArg3(dst, x, mask)
+               return true
+       }
        // match: (VPBLENDMBMasked512 dst (VPSHUFB512 x y) mask)
        // result: (VPSHUFBMasked512Merging dst x y mask)
        for {
@@ -40446,30 +40755,45 @@ func rewriteValueAMD64_OpAMD64VPBLENDMBMasked512(v *Value) bool {
                v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMBMasked512 dst (VPABSB512 x) mask)
-       // result: (VPABSBMasked512Merging dst x mask)
+       // match: (VPBLENDMBMasked512 dst (VPSUBB512 x y) mask)
+       // result: (VPSUBBMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPABSB512 {
+               if v_1.Op != OpAMD64VPSUBB512 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPABSBMasked512Merging)
-               v.AddArg3(dst, x, mask)
+               v.reset(OpAMD64VPSUBBMasked512Merging)
+               v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMBMasked512 dst (VPADDSB512 x y) mask)
-       // result: (VPADDSBMasked512Merging dst x y mask)
+       // match: (VPBLENDMBMasked512 dst (VPSUBSB512 x y) mask)
+       // result: (VPSUBSBMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPADDSB512 {
+               if v_1.Op != OpAMD64VPSUBSB512 {
                        break
                }
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPADDSBMasked512Merging)
+               v.reset(OpAMD64VPSUBSBMasked512Merging)
+               v.AddArg4(dst, x, y, mask)
+               return true
+       }
+       // match: (VPBLENDMBMasked512 dst (VPSUBUSB512 x y) mask)
+       // result: (VPSUBUSBMasked512Merging dst x y mask)
+       for {
+               dst := v_0
+               if v_1.Op != OpAMD64VPSUBUSB512 {
+                       break
+               }
+               y := v_1.Args[1]
+               x := v_1.Args[0]
+               mask := v_2
+               v.reset(OpAMD64VPSUBUSBMasked512Merging)
                v.AddArg4(dst, x, y, mask)
                return true
        }
@@ -40479,547 +40803,547 @@ func rewriteValueAMD64_OpAMD64VPBLENDMDMasked512(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (VPBLENDMDMasked512 dst (VPMOVSDW256 x) mask)
-       // result: (VPMOVSDWMasked256Merging dst x mask)
+       // match: (VPBLENDMDMasked512 dst (VADDPS512 x y) mask)
+       // result: (VADDPSMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVSDW256 {
+               if v_1.Op != OpAMD64VADDPS512 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPMOVSDWMasked256Merging)
-               v.AddArg3(dst, x, mask)
+               v.reset(OpAMD64VADDPSMasked512Merging)
+               v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMDMasked512 dst (VPLZCNTD512 x) mask)
-       // result: (VPLZCNTDMasked512Merging dst x mask)
+       // match: (VPBLENDMDMasked512 dst (VCVTPS2UDQ512 x) mask)
+       // result: (VCVTPS2UDQMasked512Merging dst x mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPLZCNTD512 {
+               if v_1.Op != OpAMD64VCVTPS2UDQ512 {
                        break
                }
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPLZCNTDMasked512Merging)
+               v.reset(OpAMD64VCVTPS2UDQMasked512Merging)
                v.AddArg3(dst, x, mask)
                return true
        }
-       // match: (VPBLENDMDMasked512 dst (VPMULLD512 x y) mask)
-       // result: (VPMULLDMasked512Merging dst x y mask)
+       // match: (VPBLENDMDMasked512 dst (VCVTTPS2DQ512 x) mask)
+       // result: (VCVTTPS2DQMasked512Merging dst x mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMULLD512 {
+               if v_1.Op != OpAMD64VCVTTPS2DQ512 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPMULLDMasked512Merging)
-               v.AddArg4(dst, x, y, mask)
+               v.reset(OpAMD64VCVTTPS2DQMasked512Merging)
+               v.AddArg3(dst, x, mask)
                return true
        }
-       // match: (VPBLENDMDMasked512 dst (VMAXPS512 x y) mask)
-       // result: (VMAXPSMasked512Merging dst x y mask)
+       // match: (VPBLENDMDMasked512 dst (VDIVPS512 x y) mask)
+       // result: (VDIVPSMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VMAXPS512 {
+               if v_1.Op != OpAMD64VDIVPS512 {
                        break
                }
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VMAXPSMasked512Merging)
+               v.reset(OpAMD64VDIVPSMasked512Merging)
                v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMDMasked512 dst (VPMOVUSDB128 x) mask)
-       // result: (VPMOVUSDBMasked128Merging dst x mask)
+       // match: (VPBLENDMDMasked512 dst (VMAXPS512 x y) mask)
+       // result: (VMAXPSMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVUSDB128 {
+               if v_1.Op != OpAMD64VMAXPS512 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPMOVUSDBMasked128Merging)
-               v.AddArg3(dst, x, mask)
+               v.reset(OpAMD64VMAXPSMasked512Merging)
+               v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMDMasked512 dst (VRSQRT14PS512 x) mask)
-       // result: (VRSQRT14PSMasked512Merging dst x mask)
+       // match: (VPBLENDMDMasked512 dst (VMINPS512 x y) mask)
+       // result: (VMINPSMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VRSQRT14PS512 {
+               if v_1.Op != OpAMD64VMINPS512 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VRSQRT14PSMasked512Merging)
-               v.AddArg3(dst, x, mask)
+               v.reset(OpAMD64VMINPSMasked512Merging)
+               v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMDMasked512 dst (VPMOVDW256 x) mask)
-       // result: (VPMOVDWMasked256Merging dst x mask)
+       // match: (VPBLENDMDMasked512 dst (VMULPS512 x y) mask)
+       // result: (VMULPSMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVDW256 {
+               if v_1.Op != OpAMD64VMULPS512 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPMOVDWMasked256Merging)
-               v.AddArg3(dst, x, mask)
+               v.reset(OpAMD64VMULPSMasked512Merging)
+               v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMDMasked512 dst (VRCP14PS512 x) mask)
-       // result: (VRCP14PSMasked512Merging dst x mask)
+       // match: (VPBLENDMDMasked512 dst (VPABSD512 x) mask)
+       // result: (VPABSDMasked512Merging dst x mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VRCP14PS512 {
+               if v_1.Op != OpAMD64VPABSD512 {
                        break
                }
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VRCP14PSMasked512Merging)
+               v.reset(OpAMD64VPABSDMasked512Merging)
                v.AddArg3(dst, x, mask)
                return true
        }
-       // match: (VPBLENDMDMasked512 dst (VREDUCEPS512 [a] x) mask)
-       // result: (VREDUCEPSMasked512Merging dst [a] x mask)
+       // match: (VPBLENDMDMasked512 dst (VPACKSSDW512 x y) mask)
+       // result: (VPACKSSDWMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VREDUCEPS512 {
+               if v_1.Op != OpAMD64VPACKSSDW512 {
                        break
                }
-               a := auxIntToUint8(v_1.AuxInt)
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VREDUCEPSMasked512Merging)
-               v.AuxInt = uint8ToAuxInt(a)
-               v.AddArg3(dst, x, mask)
+               v.reset(OpAMD64VPACKSSDWMasked512Merging)
+               v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMDMasked512 dst (VDIVPS512 x y) mask)
-       // result: (VDIVPSMasked512Merging dst x y mask)
+       // match: (VPBLENDMDMasked512 dst (VPACKUSDW512 x y) mask)
+       // result: (VPACKUSDWMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VDIVPS512 {
+               if v_1.Op != OpAMD64VPACKUSDW512 {
                        break
                }
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VDIVPSMasked512Merging)
+               v.reset(OpAMD64VPACKUSDWMasked512Merging)
                v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMDMasked512 dst (VPSRLVD512 x y) mask)
-       // result: (VPSRLVDMasked512Merging dst x y mask)
+       // match: (VPBLENDMDMasked512 dst (VPADDD512 x y) mask)
+       // result: (VPADDDMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSRLVD512 {
+               if v_1.Op != OpAMD64VPADDD512 {
                        break
                }
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPSRLVDMasked512Merging)
+               v.reset(OpAMD64VPADDDMasked512Merging)
                v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMDMasked512 dst (VPSUBD512 x y) mask)
-       // result: (VPSUBDMasked512Merging dst x y mask)
+       // match: (VPBLENDMDMasked512 dst (VPANDD512 x y) mask)
+       // result: (VPANDDMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSUBD512 {
+               if v_1.Op != OpAMD64VPANDD512 {
                        break
                }
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPSUBDMasked512Merging)
+               v.reset(OpAMD64VPANDDMasked512Merging)
                v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMDMasked512 dst (VPROLD512 [a] x) mask)
-       // result: (VPROLDMasked512Merging dst [a] x mask)
+       // match: (VPBLENDMDMasked512 dst (VPLZCNTD512 x) mask)
+       // result: (VPLZCNTDMasked512Merging dst x mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPROLD512 {
+               if v_1.Op != OpAMD64VPLZCNTD512 {
                        break
                }
-               a := auxIntToUint8(v_1.AuxInt)
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPROLDMasked512Merging)
-               v.AuxInt = uint8ToAuxInt(a)
+               v.reset(OpAMD64VPLZCNTDMasked512Merging)
                v.AddArg3(dst, x, mask)
                return true
        }
-       // match: (VPBLENDMDMasked512 dst (VPORD512 x y) mask)
-       // result: (VPORDMasked512Merging dst x y mask)
+       // match: (VPBLENDMDMasked512 dst (VPMAXSD512 x y) mask)
+       // result: (VPMAXSDMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPORD512 {
+               if v_1.Op != OpAMD64VPMAXSD512 {
                        break
                }
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPORDMasked512Merging)
+               v.reset(OpAMD64VPMAXSDMasked512Merging)
                v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMDMasked512 dst (VPSHLDD512 [a] x y) mask)
-       // result: (VPSHLDDMasked512Merging dst [a] x y mask)
+       // match: (VPBLENDMDMasked512 dst (VPMAXUD512 x y) mask)
+       // result: (VPMAXUDMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSHLDD512 {
+               if v_1.Op != OpAMD64VPMAXUD512 {
                        break
                }
-               a := auxIntToUint8(v_1.AuxInt)
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPSHLDDMasked512Merging)
-               v.AuxInt = uint8ToAuxInt(a)
+               v.reset(OpAMD64VPMAXUDMasked512Merging)
                v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMDMasked512 dst (VPACKUSDW512 x y) mask)
-       // result: (VPACKUSDWMasked512Merging dst x y mask)
+       // match: (VPBLENDMDMasked512 dst (VPMINSD512 x y) mask)
+       // result: (VPMINSDMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPACKUSDW512 {
+               if v_1.Op != OpAMD64VPMINSD512 {
                        break
                }
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPACKUSDWMasked512Merging)
+               v.reset(OpAMD64VPMINSDMasked512Merging)
                v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMDMasked512 dst (VPMAXSD512 x y) mask)
-       // result: (VPMAXSDMasked512Merging dst x y mask)
+       // match: (VPBLENDMDMasked512 dst (VPMINUD512 x y) mask)
+       // result: (VPMINUDMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMAXSD512 {
+               if v_1.Op != OpAMD64VPMINUD512 {
                        break
                }
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPMAXSDMasked512Merging)
+               v.reset(OpAMD64VPMINUDMasked512Merging)
                v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMDMasked512 dst (VADDPS512 x y) mask)
-       // result: (VADDPSMasked512Merging dst x y mask)
+       // match: (VPBLENDMDMasked512 dst (VPMOVDB128_512 x) mask)
+       // result: (VPMOVDBMasked128_512Merging dst x mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VADDPS512 {
+               if v_1.Op != OpAMD64VPMOVDB128_512 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VADDPSMasked512Merging)
-               v.AddArg4(dst, x, y, mask)
+               v.reset(OpAMD64VPMOVDBMasked128_512Merging)
+               v.AddArg3(dst, x, mask)
                return true
        }
-       // match: (VPBLENDMDMasked512 dst (VPMOVUSDW256 x) mask)
-       // result: (VPMOVUSDWMasked256Merging dst x mask)
+       // match: (VPBLENDMDMasked512 dst (VPMOVDW256 x) mask)
+       // result: (VPMOVDWMasked256Merging dst x mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVUSDW256 {
+               if v_1.Op != OpAMD64VPMOVDW256 {
                        break
                }
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPMOVUSDWMasked256Merging)
+               v.reset(OpAMD64VPMOVDWMasked256Merging)
                v.AddArg3(dst, x, mask)
                return true
        }
-       // match: (VPBLENDMDMasked512 dst (VPMOVSDB128 x) mask)
-       // result: (VPMOVSDBMasked128Merging dst x mask)
+       // match: (VPBLENDMDMasked512 dst (VPMOVSDB128_512 x) mask)
+       // result: (VPMOVSDBMasked128_512Merging dst x mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVSDB128 {
+               if v_1.Op != OpAMD64VPMOVSDB128_512 {
                        break
                }
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPMOVSDBMasked128Merging)
+               v.reset(OpAMD64VPMOVSDBMasked128_512Merging)
                v.AddArg3(dst, x, mask)
                return true
        }
-       // match: (VPBLENDMDMasked512 dst (VSUBPS512 x y) mask)
-       // result: (VSUBPSMasked512Merging dst x y mask)
+       // match: (VPBLENDMDMasked512 dst (VPMOVSDW256 x) mask)
+       // result: (VPMOVSDWMasked256Merging dst x mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VSUBPS512 {
+               if v_1.Op != OpAMD64VPMOVSDW256 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VSUBPSMasked512Merging)
-               v.AddArg4(dst, x, y, mask)
+               v.reset(OpAMD64VPMOVSDWMasked256Merging)
+               v.AddArg3(dst, x, mask)
                return true
        }
-       // match: (VPBLENDMDMasked512 dst (VPMAXUD512 x y) mask)
-       // result: (VPMAXUDMasked512Merging dst x y mask)
+       // match: (VPBLENDMDMasked512 dst (VPMOVUSDB128_512 x) mask)
+       // result: (VPMOVUSDBMasked128_512Merging dst x mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMAXUD512 {
+               if v_1.Op != OpAMD64VPMOVUSDB128_512 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPMAXUDMasked512Merging)
-               v.AddArg4(dst, x, y, mask)
+               v.reset(OpAMD64VPMOVUSDBMasked128_512Merging)
+               v.AddArg3(dst, x, mask)
                return true
        }
-       // match: (VPBLENDMDMasked512 dst (VPRORD512 [a] x) mask)
-       // result: (VPRORDMasked512Merging dst [a] x mask)
+       // match: (VPBLENDMDMasked512 dst (VPMOVUSDW256 x) mask)
+       // result: (VPMOVUSDWMasked256Merging dst x mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPRORD512 {
+               if v_1.Op != OpAMD64VPMOVUSDW256 {
                        break
                }
-               a := auxIntToUint8(v_1.AuxInt)
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPRORDMasked512Merging)
-               v.AuxInt = uint8ToAuxInt(a)
+               v.reset(OpAMD64VPMOVUSDWMasked256Merging)
                v.AddArg3(dst, x, mask)
                return true
        }
-       // match: (VPBLENDMDMasked512 dst (VPROLVD512 x y) mask)
-       // result: (VPROLVDMasked512Merging dst x y mask)
+       // match: (VPBLENDMDMasked512 dst (VPMULLD512 x y) mask)
+       // result: (VPMULLDMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPROLVD512 {
+               if v_1.Op != OpAMD64VPMULLD512 {
                        break
                }
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPROLVDMasked512Merging)
+               v.reset(OpAMD64VPMULLDMasked512Merging)
                v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMDMasked512 dst (VCVTTPS2DQ512 x) mask)
-       // result: (VCVTTPS2DQMasked512Merging dst x mask)
+       // match: (VPBLENDMDMasked512 dst (VPOPCNTD512 x) mask)
+       // result: (VPOPCNTDMasked512Merging dst x mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VCVTTPS2DQ512 {
+               if v_1.Op != OpAMD64VPOPCNTD512 {
                        break
                }
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VCVTTPS2DQMasked512Merging)
+               v.reset(OpAMD64VPOPCNTDMasked512Merging)
                v.AddArg3(dst, x, mask)
                return true
        }
-       // match: (VPBLENDMDMasked512 dst (VPACKSSDW512 x y) mask)
-       // result: (VPACKSSDWMasked512Merging dst x y mask)
+       // match: (VPBLENDMDMasked512 dst (VPORD512 x y) mask)
+       // result: (VPORDMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPACKSSDW512 {
+               if v_1.Op != OpAMD64VPORD512 {
                        break
                }
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPACKSSDWMasked512Merging)
+               v.reset(OpAMD64VPORDMasked512Merging)
                v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMDMasked512 dst (VPRORVD512 x y) mask)
-       // result: (VPRORVDMasked512Merging dst x y mask)
+       // match: (VPBLENDMDMasked512 dst (VPROLD512 [a] x) mask)
+       // result: (VPROLDMasked512Merging dst [a] x mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPRORVD512 {
+               if v_1.Op != OpAMD64VPROLD512 {
                        break
                }
-               y := v_1.Args[1]
+               a := auxIntToUint8(v_1.AuxInt)
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPRORVDMasked512Merging)
-               v.AddArg4(dst, x, y, mask)
+               v.reset(OpAMD64VPROLDMasked512Merging)
+               v.AuxInt = uint8ToAuxInt(a)
+               v.AddArg3(dst, x, mask)
                return true
        }
-       // match: (VPBLENDMDMasked512 dst (VPADDD512 x y) mask)
-       // result: (VPADDDMasked512Merging dst x y mask)
+       // match: (VPBLENDMDMasked512 dst (VPROLVD512 x y) mask)
+       // result: (VPROLVDMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPADDD512 {
+               if v_1.Op != OpAMD64VPROLVD512 {
                        break
                }
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPADDDMasked512Merging)
+               v.reset(OpAMD64VPROLVDMasked512Merging)
                v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMDMasked512 dst (VRNDSCALEPS512 [a] x) mask)
-       // result: (VRNDSCALEPSMasked512Merging dst [a] x mask)
+       // match: (VPBLENDMDMasked512 dst (VPRORD512 [a] x) mask)
+       // result: (VPRORDMasked512Merging dst [a] x mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VRNDSCALEPS512 {
+               if v_1.Op != OpAMD64VPRORD512 {
                        break
                }
                a := auxIntToUint8(v_1.AuxInt)
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VRNDSCALEPSMasked512Merging)
+               v.reset(OpAMD64VPRORDMasked512Merging)
                v.AuxInt = uint8ToAuxInt(a)
                v.AddArg3(dst, x, mask)
                return true
        }
-       // match: (VPBLENDMDMasked512 dst (VCVTPS2UDQ512 x) mask)
-       // result: (VCVTPS2UDQMasked512Merging dst x mask)
+       // match: (VPBLENDMDMasked512 dst (VPRORVD512 x y) mask)
+       // result: (VPRORVDMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VCVTPS2UDQ512 {
+               if v_1.Op != OpAMD64VPRORVD512 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VCVTPS2UDQMasked512Merging)
-               v.AddArg3(dst, x, mask)
+               v.reset(OpAMD64VPRORVDMasked512Merging)
+               v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMDMasked512 dst (VPSHRDD512 [a] x y) mask)
-       // result: (VPSHRDDMasked512Merging dst [a] x y mask)
+       // match: (VPBLENDMDMasked512 dst (VPSHLDD512 [a] x y) mask)
+       // result: (VPSHLDDMasked512Merging dst [a] x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSHRDD512 {
+               if v_1.Op != OpAMD64VPSHLDD512 {
                        break
                }
                a := auxIntToUint8(v_1.AuxInt)
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPSHRDDMasked512Merging)
+               v.reset(OpAMD64VPSHLDDMasked512Merging)
                v.AuxInt = uint8ToAuxInt(a)
                v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMDMasked512 dst (VPOPCNTD512 x) mask)
-       // result: (VPOPCNTDMasked512Merging dst x mask)
+       // match: (VPBLENDMDMasked512 dst (VPSHRDD512 [a] x y) mask)
+       // result: (VPSHRDDMasked512Merging dst [a] x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPOPCNTD512 {
+               if v_1.Op != OpAMD64VPSHRDD512 {
                        break
                }
+               a := auxIntToUint8(v_1.AuxInt)
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPOPCNTDMasked512Merging)
-               v.AddArg3(dst, x, mask)
+               v.reset(OpAMD64VPSHRDDMasked512Merging)
+               v.AuxInt = uint8ToAuxInt(a)
+               v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMDMasked512 dst (VPMOVDB128 x) mask)
-       // result: (VPMOVDBMasked128Merging dst x mask)
+       // match: (VPBLENDMDMasked512 dst (VPSHUFD512 [a] x) mask)
+       // result: (VPSHUFDMasked512Merging dst [a] x mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVDB128 {
+               if v_1.Op != OpAMD64VPSHUFD512 {
                        break
                }
+               a := auxIntToUint8(v_1.AuxInt)
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPMOVDBMasked128Merging)
+               v.reset(OpAMD64VPSHUFDMasked512Merging)
+               v.AuxInt = uint8ToAuxInt(a)
                v.AddArg3(dst, x, mask)
                return true
        }
-       // match: (VPBLENDMDMasked512 dst (VPSRAD512const [a] x) mask)
-       // result: (VPSRADMasked512constMerging dst [a] x mask)
+       // match: (VPBLENDMDMasked512 dst (VPSLLD512const [a] x) mask)
+       // result: (VPSLLDMasked512constMerging dst [a] x mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSRAD512const {
+               if v_1.Op != OpAMD64VPSLLD512const {
                        break
                }
                a := auxIntToUint8(v_1.AuxInt)
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPSRADMasked512constMerging)
+               v.reset(OpAMD64VPSLLDMasked512constMerging)
                v.AuxInt = uint8ToAuxInt(a)
                v.AddArg3(dst, x, mask)
                return true
        }
-       // match: (VPBLENDMDMasked512 dst (VMINPS512 x y) mask)
-       // result: (VMINPSMasked512Merging dst x y mask)
+       // match: (VPBLENDMDMasked512 dst (VPSLLVD512 x y) mask)
+       // result: (VPSLLVDMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VMINPS512 {
+               if v_1.Op != OpAMD64VPSLLVD512 {
                        break
                }
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VMINPSMasked512Merging)
+               v.reset(OpAMD64VPSLLVDMasked512Merging)
                v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMDMasked512 dst (VPANDD512 x y) mask)
-       // result: (VPANDDMasked512Merging dst x y mask)
+       // match: (VPBLENDMDMasked512 dst (VPSRAD512const [a] x) mask)
+       // result: (VPSRADMasked512constMerging dst [a] x mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPANDD512 {
+               if v_1.Op != OpAMD64VPSRAD512const {
                        break
                }
-               y := v_1.Args[1]
+               a := auxIntToUint8(v_1.AuxInt)
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPANDDMasked512Merging)
-               v.AddArg4(dst, x, y, mask)
+               v.reset(OpAMD64VPSRADMasked512constMerging)
+               v.AuxInt = uint8ToAuxInt(a)
+               v.AddArg3(dst, x, mask)
                return true
        }
-       // match: (VPBLENDMDMasked512 dst (VPSHUFD512 [a] x) mask)
-       // result: (VPSHUFDMasked512Merging dst [a] x mask)
+       // match: (VPBLENDMDMasked512 dst (VPSRAVD512 x y) mask)
+       // result: (VPSRAVDMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSHUFD512 {
+               if v_1.Op != OpAMD64VPSRAVD512 {
                        break
                }
-               a := auxIntToUint8(v_1.AuxInt)
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPSHUFDMasked512Merging)
-               v.AuxInt = uint8ToAuxInt(a)
-               v.AddArg3(dst, x, mask)
+               v.reset(OpAMD64VPSRAVDMasked512Merging)
+               v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMDMasked512 dst (VPMINSD512 x y) mask)
-       // result: (VPMINSDMasked512Merging dst x y mask)
+       // match: (VPBLENDMDMasked512 dst (VPSRLVD512 x y) mask)
+       // result: (VPSRLVDMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMINSD512 {
+               if v_1.Op != OpAMD64VPSRLVD512 {
                        break
                }
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPMINSDMasked512Merging)
+               v.reset(OpAMD64VPSRLVDMasked512Merging)
                v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMDMasked512 dst (VPSRAVD512 x y) mask)
-       // result: (VPSRAVDMasked512Merging dst x y mask)
+       // match: (VPBLENDMDMasked512 dst (VPSUBD512 x y) mask)
+       // result: (VPSUBDMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSRAVD512 {
+               if v_1.Op != OpAMD64VPSUBD512 {
                        break
                }
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPSRAVDMasked512Merging)
+               v.reset(OpAMD64VPSUBDMasked512Merging)
                v.AddArg4(dst, x, y, mask)
                return true
        }
@@ -41037,47 +41361,60 @@ func rewriteValueAMD64_OpAMD64VPBLENDMDMasked512(v *Value) bool {
                v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMDMasked512 dst (VPSLLVD512 x y) mask)
-       // result: (VPSLLVDMasked512Merging dst x y mask)
+       // match: (VPBLENDMDMasked512 dst (VRCP14PS512 x) mask)
+       // result: (VRCP14PSMasked512Merging dst x mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSLLVD512 {
+               if v_1.Op != OpAMD64VRCP14PS512 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPSLLVDMasked512Merging)
-               v.AddArg4(dst, x, y, mask)
+               v.reset(OpAMD64VRCP14PSMasked512Merging)
+               v.AddArg3(dst, x, mask)
                return true
        }
-       // match: (VPBLENDMDMasked512 dst (VPSLLD512const [a] x) mask)
-       // result: (VPSLLDMasked512constMerging dst [a] x mask)
+       // match: (VPBLENDMDMasked512 dst (VREDUCEPS512 [a] x) mask)
+       // result: (VREDUCEPSMasked512Merging dst [a] x mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSLLD512const {
+               if v_1.Op != OpAMD64VREDUCEPS512 {
                        break
                }
                a := auxIntToUint8(v_1.AuxInt)
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPSLLDMasked512constMerging)
+               v.reset(OpAMD64VREDUCEPSMasked512Merging)
                v.AuxInt = uint8ToAuxInt(a)
                v.AddArg3(dst, x, mask)
                return true
        }
-       // match: (VPBLENDMDMasked512 dst (VPMINUD512 x y) mask)
-       // result: (VPMINUDMasked512Merging dst x y mask)
+       // match: (VPBLENDMDMasked512 dst (VRNDSCALEPS512 [a] x) mask)
+       // result: (VRNDSCALEPSMasked512Merging dst [a] x mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMINUD512 {
+               if v_1.Op != OpAMD64VRNDSCALEPS512 {
                        break
                }
-               y := v_1.Args[1]
+               a := auxIntToUint8(v_1.AuxInt)
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPMINUDMasked512Merging)
-               v.AddArg4(dst, x, y, mask)
+               v.reset(OpAMD64VRNDSCALEPSMasked512Merging)
+               v.AuxInt = uint8ToAuxInt(a)
+               v.AddArg3(dst, x, mask)
+               return true
+       }
+       // match: (VPBLENDMDMasked512 dst (VRSQRT14PS512 x) mask)
+       // result: (VRSQRT14PSMasked512Merging dst x mask)
+       for {
+               dst := v_0
+               if v_1.Op != OpAMD64VRSQRT14PS512 {
+                       break
+               }
+               x := v_1.Args[0]
+               mask := v_2
+               v.reset(OpAMD64VRSQRT14PSMasked512Merging)
+               v.AddArg3(dst, x, mask)
                return true
        }
        // match: (VPBLENDMDMasked512 dst (VSCALEFPS512 x y) mask)
@@ -41107,30 +41444,17 @@ func rewriteValueAMD64_OpAMD64VPBLENDMDMasked512(v *Value) bool {
                v.AddArg3(dst, x, mask)
                return true
        }
-       // match: (VPBLENDMDMasked512 dst (VPABSD512 x) mask)
-       // result: (VPABSDMasked512Merging dst x mask)
-       for {
-               dst := v_0
-               if v_1.Op != OpAMD64VPABSD512 {
-                       break
-               }
-               x := v_1.Args[0]
-               mask := v_2
-               v.reset(OpAMD64VPABSDMasked512Merging)
-               v.AddArg3(dst, x, mask)
-               return true
-       }
-       // match: (VPBLENDMDMasked512 dst (VMULPS512 x y) mask)
-       // result: (VMULPSMasked512Merging dst x y mask)
+       // match: (VPBLENDMDMasked512 dst (VSUBPS512 x y) mask)
+       // result: (VSUBPSMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VMULPS512 {
+               if v_1.Op != OpAMD64VSUBPS512 {
                        break
                }
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VMULPSMasked512Merging)
+               v.reset(OpAMD64VSUBPSMasked512Merging)
                v.AddArg4(dst, x, y, mask)
                return true
        }
@@ -41163,104 +41487,74 @@ func rewriteValueAMD64_OpAMD64VPBLENDMQMasked512(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (VPBLENDMQMasked512 dst (VPSLLQ512const [a] x) mask)
-       // result: (VPSLLQMasked512constMerging dst [a] x mask)
+       // match: (VPBLENDMQMasked512 dst (VADDPD512 x y) mask)
+       // result: (VADDPDMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSLLQ512const {
+               if v_1.Op != OpAMD64VADDPD512 {
                        break
                }
-               a := auxIntToUint8(v_1.AuxInt)
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPSLLQMasked512constMerging)
-               v.AuxInt = uint8ToAuxInt(a)
-               v.AddArg3(dst, x, mask)
+               v.reset(OpAMD64VADDPDMasked512Merging)
+               v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMQMasked512 dst (VPSUBQ512 x y) mask)
-       // result: (VPSUBQMasked512Merging dst x y mask)
+       // match: (VPBLENDMQMasked512 dst (VDIVPD512 x y) mask)
+       // result: (VDIVPDMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSUBQ512 {
+               if v_1.Op != OpAMD64VDIVPD512 {
                        break
                }
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPSUBQMasked512Merging)
+               v.reset(OpAMD64VDIVPDMasked512Merging)
                v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMQMasked512 dst (VPROLQ512 [a] x) mask)
-       // result: (VPROLQMasked512Merging dst [a] x mask)
-       for {
-               dst := v_0
-               if v_1.Op != OpAMD64VPROLQ512 {
-                       break
-               }
-               a := auxIntToUint8(v_1.AuxInt)
-               x := v_1.Args[0]
-               mask := v_2
-               v.reset(OpAMD64VPROLQMasked512Merging)
-               v.AuxInt = uint8ToAuxInt(a)
-               v.AddArg3(dst, x, mask)
-               return true
-       }
-       // match: (VPBLENDMQMasked512 dst (VPSLLVQ512 x y) mask)
-       // result: (VPSLLVQMasked512Merging dst x y mask)
+       // match: (VPBLENDMQMasked512 dst (VMAXPD512 x y) mask)
+       // result: (VMAXPDMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSLLVQ512 {
+               if v_1.Op != OpAMD64VMAXPD512 {
                        break
                }
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPSLLVQMasked512Merging)
+               v.reset(OpAMD64VMAXPDMasked512Merging)
                v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMQMasked512 dst (VPMOVUSQB128 x) mask)
-       // result: (VPMOVUSQBMasked128Merging dst x mask)
-       for {
-               dst := v_0
-               if v_1.Op != OpAMD64VPMOVUSQB128 {
-                       break
-               }
-               x := v_1.Args[0]
-               mask := v_2
-               v.reset(OpAMD64VPMOVUSQBMasked128Merging)
-               v.AddArg3(dst, x, mask)
-               return true
-       }
-       // match: (VPBLENDMQMasked512 dst (VPADDQ512 x y) mask)
-       // result: (VPADDQMasked512Merging dst x y mask)
+       // match: (VPBLENDMQMasked512 dst (VMINPD512 x y) mask)
+       // result: (VMINPDMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPADDQ512 {
+               if v_1.Op != OpAMD64VMINPD512 {
                        break
                }
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPADDQMasked512Merging)
+               v.reset(OpAMD64VMINPDMasked512Merging)
                v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMQMasked512 dst (VRNDSCALEPD512 [a] x) mask)
-       // result: (VRNDSCALEPDMasked512Merging dst [a] x mask)
+       // match: (VPBLENDMQMasked512 dst (VMULPD512 x y) mask)
+       // result: (VMULPDMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VRNDSCALEPD512 {
+               if v_1.Op != OpAMD64VMULPD512 {
                        break
                }
-               a := auxIntToUint8(v_1.AuxInt)
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VRNDSCALEPDMasked512Merging)
-               v.AuxInt = uint8ToAuxInt(a)
-               v.AddArg3(dst, x, mask)
+               v.reset(OpAMD64VMULPDMasked512Merging)
+               v.AddArg4(dst, x, y, mask)
                return true
        }
        // match: (VPBLENDMQMasked512 dst (VPABSQ512 x) mask)
@@ -41276,226 +41570,218 @@ func rewriteValueAMD64_OpAMD64VPBLENDMQMasked512(v *Value) bool {
                v.AddArg3(dst, x, mask)
                return true
        }
-       // match: (VPBLENDMQMasked512 dst (VPMOVUSQD256 x) mask)
-       // result: (VPMOVUSQDMasked256Merging dst x mask)
+       // match: (VPBLENDMQMasked512 dst (VPADDQ512 x y) mask)
+       // result: (VPADDQMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVUSQD256 {
+               if v_1.Op != OpAMD64VPADDQ512 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPMOVUSQDMasked256Merging)
-               v.AddArg3(dst, x, mask)
+               v.reset(OpAMD64VPADDQMasked512Merging)
+               v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMQMasked512 dst (VADDPD512 x y) mask)
-       // result: (VADDPDMasked512Merging dst x y mask)
+       // match: (VPBLENDMQMasked512 dst (VPANDQ512 x y) mask)
+       // result: (VPANDQMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VADDPD512 {
+               if v_1.Op != OpAMD64VPANDQ512 {
                        break
                }
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VADDPDMasked512Merging)
+               v.reset(OpAMD64VPANDQMasked512Merging)
                v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMQMasked512 dst (VRCP14PD512 x) mask)
-       // result: (VRCP14PDMasked512Merging dst x mask)
+       // match: (VPBLENDMQMasked512 dst (VPLZCNTQ512 x) mask)
+       // result: (VPLZCNTQMasked512Merging dst x mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VRCP14PD512 {
+               if v_1.Op != OpAMD64VPLZCNTQ512 {
                        break
                }
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VRCP14PDMasked512Merging)
+               v.reset(OpAMD64VPLZCNTQMasked512Merging)
                v.AddArg3(dst, x, mask)
                return true
        }
-       // match: (VPBLENDMQMasked512 dst (VPSRLVQ512 x y) mask)
-       // result: (VPSRLVQMasked512Merging dst x y mask)
+       // match: (VPBLENDMQMasked512 dst (VPMAXSQ512 x y) mask)
+       // result: (VPMAXSQMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSRLVQ512 {
+               if v_1.Op != OpAMD64VPMAXSQ512 {
                        break
                }
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPSRLVQMasked512Merging)
+               v.reset(OpAMD64VPMAXSQMasked512Merging)
                v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMQMasked512 dst (VPRORVQ512 x y) mask)
-       // result: (VPRORVQMasked512Merging dst x y mask)
+       // match: (VPBLENDMQMasked512 dst (VPMAXUQ512 x y) mask)
+       // result: (VPMAXUQMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPRORVQ512 {
+               if v_1.Op != OpAMD64VPMAXUQ512 {
                        break
                }
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPRORVQMasked512Merging)
+               v.reset(OpAMD64VPMAXUQMasked512Merging)
                v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMQMasked512 dst (VPSRAVQ512 x y) mask)
-       // result: (VPSRAVQMasked512Merging dst x y mask)
+       // match: (VPBLENDMQMasked512 dst (VPMINSQ512 x y) mask)
+       // result: (VPMINSQMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSRAVQ512 {
+               if v_1.Op != OpAMD64VPMINSQ512 {
                        break
                }
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPSRAVQMasked512Merging)
+               v.reset(OpAMD64VPMINSQMasked512Merging)
                v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMQMasked512 dst (VPANDQ512 x y) mask)
-       // result: (VPANDQMasked512Merging dst x y mask)
+       // match: (VPBLENDMQMasked512 dst (VPMINUQ512 x y) mask)
+       // result: (VPMINUQMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPANDQ512 {
+               if v_1.Op != OpAMD64VPMINUQ512 {
                        break
                }
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPANDQMasked512Merging)
+               v.reset(OpAMD64VPMINUQMasked512Merging)
                v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMQMasked512 dst (VPMOVQB128 x) mask)
-       // result: (VPMOVQBMasked128Merging dst x mask)
+       // match: (VPBLENDMQMasked512 dst (VPMOVQB128_512 x) mask)
+       // result: (VPMOVQBMasked128_512Merging dst x mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVQB128 {
+               if v_1.Op != OpAMD64VPMOVQB128_512 {
                        break
                }
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPMOVQBMasked128Merging)
+               v.reset(OpAMD64VPMOVQBMasked128_512Merging)
                v.AddArg3(dst, x, mask)
                return true
        }
-       // match: (VPBLENDMQMasked512 dst (VPSHLDQ512 [a] x y) mask)
-       // result: (VPSHLDQMasked512Merging dst [a] x y mask)
+       // match: (VPBLENDMQMasked512 dst (VPMOVQD256 x) mask)
+       // result: (VPMOVQDMasked256Merging dst x mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSHLDQ512 {
+               if v_1.Op != OpAMD64VPMOVQD256 {
                        break
                }
-               a := auxIntToUint8(v_1.AuxInt)
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPSHLDQMasked512Merging)
-               v.AuxInt = uint8ToAuxInt(a)
-               v.AddArg4(dst, x, y, mask)
+               v.reset(OpAMD64VPMOVQDMasked256Merging)
+               v.AddArg3(dst, x, mask)
                return true
        }
-       // match: (VPBLENDMQMasked512 dst (VDIVPD512 x y) mask)
-       // result: (VDIVPDMasked512Merging dst x y mask)
+       // match: (VPBLENDMQMasked512 dst (VPMOVQW128_512 x) mask)
+       // result: (VPMOVQWMasked128_512Merging dst x mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VDIVPD512 {
+               if v_1.Op != OpAMD64VPMOVQW128_512 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VDIVPDMasked512Merging)
-               v.AddArg4(dst, x, y, mask)
+               v.reset(OpAMD64VPMOVQWMasked128_512Merging)
+               v.AddArg3(dst, x, mask)
                return true
        }
-       // match: (VPBLENDMQMasked512 dst (VPROLVQ512 x y) mask)
-       // result: (VPROLVQMasked512Merging dst x y mask)
+       // match: (VPBLENDMQMasked512 dst (VPMOVSQB128_512 x) mask)
+       // result: (VPMOVSQBMasked128_512Merging dst x mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPROLVQ512 {
+               if v_1.Op != OpAMD64VPMOVSQB128_512 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPROLVQMasked512Merging)
-               v.AddArg4(dst, x, y, mask)
+               v.reset(OpAMD64VPMOVSQBMasked128_512Merging)
+               v.AddArg3(dst, x, mask)
                return true
        }
-       // match: (VPBLENDMQMasked512 dst (VPRORQ512 [a] x) mask)
-       // result: (VPRORQMasked512Merging dst [a] x mask)
+       // match: (VPBLENDMQMasked512 dst (VPMOVSQD256 x) mask)
+       // result: (VPMOVSQDMasked256Merging dst x mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPRORQ512 {
+               if v_1.Op != OpAMD64VPMOVSQD256 {
                        break
                }
-               a := auxIntToUint8(v_1.AuxInt)
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPRORQMasked512Merging)
-               v.AuxInt = uint8ToAuxInt(a)
+               v.reset(OpAMD64VPMOVSQDMasked256Merging)
                v.AddArg3(dst, x, mask)
                return true
        }
-       // match: (VPBLENDMQMasked512 dst (VPMINSQ512 x y) mask)
-       // result: (VPMINSQMasked512Merging dst x y mask)
+       // match: (VPBLENDMQMasked512 dst (VPMOVSQW128_512 x) mask)
+       // result: (VPMOVSQWMasked128_512Merging dst x mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMINSQ512 {
+               if v_1.Op != OpAMD64VPMOVSQW128_512 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPMINSQMasked512Merging)
-               v.AddArg4(dst, x, y, mask)
+               v.reset(OpAMD64VPMOVSQWMasked128_512Merging)
+               v.AddArg3(dst, x, mask)
                return true
        }
-       // match: (VPBLENDMQMasked512 dst (VSQRTPD512 x) mask)
-       // result: (VSQRTPDMasked512Merging dst x mask)
+       // match: (VPBLENDMQMasked512 dst (VPMOVUSQB128_512 x) mask)
+       // result: (VPMOVUSQBMasked128_512Merging dst x mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VSQRTPD512 {
+               if v_1.Op != OpAMD64VPMOVUSQB128_512 {
                        break
                }
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VSQRTPDMasked512Merging)
+               v.reset(OpAMD64VPMOVUSQBMasked128_512Merging)
                v.AddArg3(dst, x, mask)
                return true
        }
-       // match: (VPBLENDMQMasked512 dst (VPMOVSQD256 x) mask)
-       // result: (VPMOVSQDMasked256Merging dst x mask)
+       // match: (VPBLENDMQMasked512 dst (VPMOVUSQD256 x) mask)
+       // result: (VPMOVUSQDMasked256Merging dst x mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVSQD256 {
+               if v_1.Op != OpAMD64VPMOVUSQD256 {
                        break
                }
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPMOVSQDMasked256Merging)
+               v.reset(OpAMD64VPMOVUSQDMasked256Merging)
                v.AddArg3(dst, x, mask)
                return true
        }
-       // match: (VPBLENDMQMasked512 dst (VMINPD512 x y) mask)
-       // result: (VMINPDMasked512Merging dst x y mask)
+       // match: (VPBLENDMQMasked512 dst (VPMOVUSQW128_512 x) mask)
+       // result: (VPMOVUSQWMasked128_512Merging dst x mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VMINPD512 {
+               if v_1.Op != OpAMD64VPMOVUSQW128_512 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VMINPDMasked512Merging)
-               v.AddArg4(dst, x, y, mask)
+               v.reset(OpAMD64VPMOVUSQWMasked128_512Merging)
+               v.AddArg3(dst, x, mask)
                return true
        }
        // match: (VPBLENDMQMasked512 dst (VPMULLQ512 x y) mask)
@@ -41512,237 +41798,263 @@ func rewriteValueAMD64_OpAMD64VPBLENDMQMasked512(v *Value) bool {
                v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMQMasked512 dst (VMAXPD512 x y) mask)
-       // result: (VMAXPDMasked512Merging dst x y mask)
+       // match: (VPBLENDMQMasked512 dst (VPOPCNTQ512 x) mask)
+       // result: (VPOPCNTQMasked512Merging dst x mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VMAXPD512 {
+               if v_1.Op != OpAMD64VPOPCNTQ512 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VMAXPDMasked512Merging)
-               v.AddArg4(dst, x, y, mask)
+               v.reset(OpAMD64VPOPCNTQMasked512Merging)
+               v.AddArg3(dst, x, mask)
                return true
        }
-       // match: (VPBLENDMQMasked512 dst (VMULPD512 x y) mask)
-       // result: (VMULPDMasked512Merging dst x y mask)
+       // match: (VPBLENDMQMasked512 dst (VPORQ512 x y) mask)
+       // result: (VPORQMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VMULPD512 {
+               if v_1.Op != OpAMD64VPORQ512 {
                        break
                }
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VMULPDMasked512Merging)
+               v.reset(OpAMD64VPORQMasked512Merging)
                v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMQMasked512 dst (VPORQ512 x y) mask)
-       // result: (VPORQMasked512Merging dst x y mask)
+       // match: (VPBLENDMQMasked512 dst (VPROLQ512 [a] x) mask)
+       // result: (VPROLQMasked512Merging dst [a] x mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPORQ512 {
+               if v_1.Op != OpAMD64VPROLQ512 {
                        break
                }
-               y := v_1.Args[1]
+               a := auxIntToUint8(v_1.AuxInt)
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPORQMasked512Merging)
-               v.AddArg4(dst, x, y, mask)
+               v.reset(OpAMD64VPROLQMasked512Merging)
+               v.AuxInt = uint8ToAuxInt(a)
+               v.AddArg3(dst, x, mask)
                return true
        }
-       // match: (VPBLENDMQMasked512 dst (VPMOVUSQW128 x) mask)
-       // result: (VPMOVUSQWMasked128Merging dst x mask)
+       // match: (VPBLENDMQMasked512 dst (VPROLVQ512 x y) mask)
+       // result: (VPROLVQMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVUSQW128 {
+               if v_1.Op != OpAMD64VPROLVQ512 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPMOVUSQWMasked128Merging)
-               v.AddArg3(dst, x, mask)
+               v.reset(OpAMD64VPROLVQMasked512Merging)
+               v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMQMasked512 dst (VREDUCEPD512 [a] x) mask)
-       // result: (VREDUCEPDMasked512Merging dst [a] x mask)
+       // match: (VPBLENDMQMasked512 dst (VPRORQ512 [a] x) mask)
+       // result: (VPRORQMasked512Merging dst [a] x mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VREDUCEPD512 {
+               if v_1.Op != OpAMD64VPRORQ512 {
                        break
                }
                a := auxIntToUint8(v_1.AuxInt)
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VREDUCEPDMasked512Merging)
+               v.reset(OpAMD64VPRORQMasked512Merging)
                v.AuxInt = uint8ToAuxInt(a)
                v.AddArg3(dst, x, mask)
                return true
        }
-       // match: (VPBLENDMQMasked512 dst (VPOPCNTQ512 x) mask)
-       // result: (VPOPCNTQMasked512Merging dst x mask)
+       // match: (VPBLENDMQMasked512 dst (VPRORVQ512 x y) mask)
+       // result: (VPRORVQMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPOPCNTQ512 {
+               if v_1.Op != OpAMD64VPRORVQ512 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPOPCNTQMasked512Merging)
-               v.AddArg3(dst, x, mask)
+               v.reset(OpAMD64VPRORVQMasked512Merging)
+               v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMQMasked512 dst (VPXORQ512 x y) mask)
-       // result: (VPXORQMasked512Merging dst x y mask)
+       // match: (VPBLENDMQMasked512 dst (VPSHLDQ512 [a] x y) mask)
+       // result: (VPSHLDQMasked512Merging dst [a] x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPXORQ512 {
+               if v_1.Op != OpAMD64VPSHLDQ512 {
                        break
                }
+               a := auxIntToUint8(v_1.AuxInt)
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPXORQMasked512Merging)
+               v.reset(OpAMD64VPSHLDQMasked512Merging)
+               v.AuxInt = uint8ToAuxInt(a)
                v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMQMasked512 dst (VPMOVQD256 x) mask)
-       // result: (VPMOVQDMasked256Merging dst x mask)
+       // match: (VPBLENDMQMasked512 dst (VPSHRDQ512 [a] x y) mask)
+       // result: (VPSHRDQMasked512Merging dst [a] x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVQD256 {
+               if v_1.Op != OpAMD64VPSHRDQ512 {
                        break
                }
+               a := auxIntToUint8(v_1.AuxInt)
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPMOVQDMasked256Merging)
-               v.AddArg3(dst, x, mask)
+               v.reset(OpAMD64VPSHRDQMasked512Merging)
+               v.AuxInt = uint8ToAuxInt(a)
+               v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMQMasked512 dst (VPMAXUQ512 x y) mask)
-       // result: (VPMAXUQMasked512Merging dst x y mask)
+       // match: (VPBLENDMQMasked512 dst (VPSLLQ512const [a] x) mask)
+       // result: (VPSLLQMasked512constMerging dst [a] x mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMAXUQ512 {
+               if v_1.Op != OpAMD64VPSLLQ512const {
                        break
                }
-               y := v_1.Args[1]
+               a := auxIntToUint8(v_1.AuxInt)
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPMAXUQMasked512Merging)
-               v.AddArg4(dst, x, y, mask)
+               v.reset(OpAMD64VPSLLQMasked512constMerging)
+               v.AuxInt = uint8ToAuxInt(a)
+               v.AddArg3(dst, x, mask)
                return true
        }
-       // match: (VPBLENDMQMasked512 dst (VSUBPD512 x y) mask)
-       // result: (VSUBPDMasked512Merging dst x y mask)
+       // match: (VPBLENDMQMasked512 dst (VPSLLVQ512 x y) mask)
+       // result: (VPSLLVQMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VSUBPD512 {
+               if v_1.Op != OpAMD64VPSLLVQ512 {
                        break
                }
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VSUBPDMasked512Merging)
+               v.reset(OpAMD64VPSLLVQMasked512Merging)
                v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMQMasked512 dst (VPMOVQW128 x) mask)
-       // result: (VPMOVQWMasked128Merging dst x mask)
+       // match: (VPBLENDMQMasked512 dst (VPSRAQ512const [a] x) mask)
+       // result: (VPSRAQMasked512constMerging dst [a] x mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVQW128 {
+               if v_1.Op != OpAMD64VPSRAQ512const {
                        break
                }
+               a := auxIntToUint8(v_1.AuxInt)
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPMOVQWMasked128Merging)
+               v.reset(OpAMD64VPSRAQMasked512constMerging)
+               v.AuxInt = uint8ToAuxInt(a)
                v.AddArg3(dst, x, mask)
                return true
        }
-       // match: (VPBLENDMQMasked512 dst (VPSHRDQ512 [a] x y) mask)
-       // result: (VPSHRDQMasked512Merging dst [a] x y mask)
+       // match: (VPBLENDMQMasked512 dst (VPSRAVQ512 x y) mask)
+       // result: (VPSRAVQMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSHRDQ512 {
+               if v_1.Op != OpAMD64VPSRAVQ512 {
                        break
                }
-               a := auxIntToUint8(v_1.AuxInt)
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPSHRDQMasked512Merging)
-               v.AuxInt = uint8ToAuxInt(a)
+               v.reset(OpAMD64VPSRAVQMasked512Merging)
                v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMQMasked512 dst (VPLZCNTQ512 x) mask)
-       // result: (VPLZCNTQMasked512Merging dst x mask)
+       // match: (VPBLENDMQMasked512 dst (VPSRLVQ512 x y) mask)
+       // result: (VPSRLVQMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPLZCNTQ512 {
+               if v_1.Op != OpAMD64VPSRLVQ512 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPLZCNTQMasked512Merging)
-               v.AddArg3(dst, x, mask)
+               v.reset(OpAMD64VPSRLVQMasked512Merging)
+               v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMQMasked512 dst (VSCALEFPD512 x y) mask)
-       // result: (VSCALEFPDMasked512Merging dst x y mask)
+       // match: (VPBLENDMQMasked512 dst (VPSUBQ512 x y) mask)
+       // result: (VPSUBQMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VSCALEFPD512 {
+               if v_1.Op != OpAMD64VPSUBQ512 {
                        break
                }
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VSCALEFPDMasked512Merging)
+               v.reset(OpAMD64VPSUBQMasked512Merging)
+               v.AddArg4(dst, x, y, mask)
+               return true
+       }
+       // match: (VPBLENDMQMasked512 dst (VPXORQ512 x y) mask)
+       // result: (VPXORQMasked512Merging dst x y mask)
+       for {
+               dst := v_0
+               if v_1.Op != OpAMD64VPXORQ512 {
+                       break
+               }
+               y := v_1.Args[1]
+               x := v_1.Args[0]
+               mask := v_2
+               v.reset(OpAMD64VPXORQMasked512Merging)
                v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMQMasked512 dst (VPMOVSQW128 x) mask)
-       // result: (VPMOVSQWMasked128Merging dst x mask)
+       // match: (VPBLENDMQMasked512 dst (VRCP14PD512 x) mask)
+       // result: (VRCP14PDMasked512Merging dst x mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVSQW128 {
+               if v_1.Op != OpAMD64VRCP14PD512 {
                        break
                }
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPMOVSQWMasked128Merging)
+               v.reset(OpAMD64VRCP14PDMasked512Merging)
                v.AddArg3(dst, x, mask)
                return true
        }
-       // match: (VPBLENDMQMasked512 dst (VPMINUQ512 x y) mask)
-       // result: (VPMINUQMasked512Merging dst x y mask)
+       // match: (VPBLENDMQMasked512 dst (VREDUCEPD512 [a] x) mask)
+       // result: (VREDUCEPDMasked512Merging dst [a] x mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMINUQ512 {
+               if v_1.Op != OpAMD64VREDUCEPD512 {
                        break
                }
-               y := v_1.Args[1]
+               a := auxIntToUint8(v_1.AuxInt)
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPMINUQMasked512Merging)
-               v.AddArg4(dst, x, y, mask)
+               v.reset(OpAMD64VREDUCEPDMasked512Merging)
+               v.AuxInt = uint8ToAuxInt(a)
+               v.AddArg3(dst, x, mask)
                return true
        }
-       // match: (VPBLENDMQMasked512 dst (VPMOVSQB128 x) mask)
-       // result: (VPMOVSQBMasked128Merging dst x mask)
+       // match: (VPBLENDMQMasked512 dst (VRNDSCALEPD512 [a] x) mask)
+       // result: (VRNDSCALEPDMasked512Merging dst [a] x mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVSQB128 {
+               if v_1.Op != OpAMD64VRNDSCALEPD512 {
                        break
                }
+               a := auxIntToUint8(v_1.AuxInt)
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPMOVSQBMasked128Merging)
+               v.reset(OpAMD64VRNDSCALEPDMasked512Merging)
+               v.AuxInt = uint8ToAuxInt(a)
                v.AddArg3(dst, x, mask)
                return true
        }
@@ -41759,32 +42071,44 @@ func rewriteValueAMD64_OpAMD64VPBLENDMQMasked512(v *Value) bool {
                v.AddArg3(dst, x, mask)
                return true
        }
-       // match: (VPBLENDMQMasked512 dst (VPSRAQ512const [a] x) mask)
-       // result: (VPSRAQMasked512constMerging dst [a] x mask)
+       // match: (VPBLENDMQMasked512 dst (VSCALEFPD512 x y) mask)
+       // result: (VSCALEFPDMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSRAQ512const {
+               if v_1.Op != OpAMD64VSCALEFPD512 {
                        break
                }
-               a := auxIntToUint8(v_1.AuxInt)
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPSRAQMasked512constMerging)
-               v.AuxInt = uint8ToAuxInt(a)
+               v.reset(OpAMD64VSCALEFPDMasked512Merging)
+               v.AddArg4(dst, x, y, mask)
+               return true
+       }
+       // match: (VPBLENDMQMasked512 dst (VSQRTPD512 x) mask)
+       // result: (VSQRTPDMasked512Merging dst x mask)
+       for {
+               dst := v_0
+               if v_1.Op != OpAMD64VSQRTPD512 {
+                       break
+               }
+               x := v_1.Args[0]
+               mask := v_2
+               v.reset(OpAMD64VSQRTPDMasked512Merging)
                v.AddArg3(dst, x, mask)
                return true
        }
-       // match: (VPBLENDMQMasked512 dst (VPMAXSQ512 x y) mask)
-       // result: (VPMAXSQMasked512Merging dst x y mask)
+       // match: (VPBLENDMQMasked512 dst (VSUBPD512 x y) mask)
+       // result: (VSUBPDMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMAXSQ512 {
+               if v_1.Op != OpAMD64VSUBPD512 {
                        break
                }
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPMAXSQMasked512Merging)
+               v.reset(OpAMD64VSUBPDMasked512Merging)
                v.AddArg4(dst, x, y, mask)
                return true
        }
@@ -41817,45 +42141,73 @@ func rewriteValueAMD64_OpAMD64VPBLENDMWMasked512(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
        v_0 := v.Args[0]
-       // match: (VPBLENDMWMasked512 dst (VPMAXSW512 x y) mask)
-       // result: (VPMAXSWMasked512Merging dst x y mask)
+       // match: (VPBLENDMWMasked512 dst (VPABSW512 x) mask)
+       // result: (VPABSWMasked512Merging dst x mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMAXSW512 {
+               if v_1.Op != OpAMD64VPABSW512 {
+                       break
+               }
+               x := v_1.Args[0]
+               mask := v_2
+               v.reset(OpAMD64VPABSWMasked512Merging)
+               v.AddArg3(dst, x, mask)
+               return true
+       }
+       // match: (VPBLENDMWMasked512 dst (VPADDSW512 x y) mask)
+       // result: (VPADDSWMasked512Merging dst x y mask)
+       for {
+               dst := v_0
+               if v_1.Op != OpAMD64VPADDSW512 {
                        break
                }
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPMAXSWMasked512Merging)
+               v.reset(OpAMD64VPADDSWMasked512Merging)
                v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMWMasked512 dst (VPMULHW512 x y) mask)
-       // result: (VPMULHWMasked512Merging dst x y mask)
+       // match: (VPBLENDMWMasked512 dst (VPADDUSW512 x y) mask)
+       // result: (VPADDUSWMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMULHW512 {
+               if v_1.Op != OpAMD64VPADDUSW512 {
                        break
                }
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPMULHWMasked512Merging)
+               v.reset(OpAMD64VPADDUSWMasked512Merging)
                v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMWMasked512 dst (VPMOVWB256 x) mask)
-       // result: (VPMOVWBMasked256Merging dst x mask)
+       // match: (VPBLENDMWMasked512 dst (VPADDW512 x y) mask)
+       // result: (VPADDWMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVWB256 {
+               if v_1.Op != OpAMD64VPADDW512 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPMOVWBMasked256Merging)
-               v.AddArg3(dst, x, mask)
+               v.reset(OpAMD64VPADDWMasked512Merging)
+               v.AddArg4(dst, x, y, mask)
+               return true
+       }
+       // match: (VPBLENDMWMasked512 dst (VPAVGW512 x y) mask)
+       // result: (VPAVGWMasked512Merging dst x y mask)
+       for {
+               dst := v_0
+               if v_1.Op != OpAMD64VPAVGW512 {
+                       break
+               }
+               y := v_1.Args[1]
+               x := v_1.Args[0]
+               mask := v_2
+               v.reset(OpAMD64VPAVGWMasked512Merging)
+               v.AddArg4(dst, x, y, mask)
                return true
        }
        // match: (VPBLENDMWMasked512 dst (VPMADDUBSW512 x y) mask)
@@ -41872,47 +42224,46 @@ func rewriteValueAMD64_OpAMD64VPBLENDMWMasked512(v *Value) bool {
                v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMWMasked512 dst (VPSHLDW512 [a] x y) mask)
-       // result: (VPSHLDWMasked512Merging dst [a] x y mask)
+       // match: (VPBLENDMWMasked512 dst (VPMADDWD512 x y) mask)
+       // result: (VPMADDWDMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSHLDW512 {
+               if v_1.Op != OpAMD64VPMADDWD512 {
                        break
                }
-               a := auxIntToUint8(v_1.AuxInt)
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPSHLDWMasked512Merging)
-               v.AuxInt = uint8ToAuxInt(a)
+               v.reset(OpAMD64VPMADDWDMasked512Merging)
                v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMWMasked512 dst (VPMULHUW512 x y) mask)
-       // result: (VPMULHUWMasked512Merging dst x y mask)
+       // match: (VPBLENDMWMasked512 dst (VPMAXSW512 x y) mask)
+       // result: (VPMAXSWMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMULHUW512 {
+               if v_1.Op != OpAMD64VPMAXSW512 {
                        break
                }
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPMULHUWMasked512Merging)
+               v.reset(OpAMD64VPMAXSWMasked512Merging)
                v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMWMasked512 dst (VPMOVUSWB256 x) mask)
-       // result: (VPMOVUSWBMasked256Merging dst x mask)
+       // match: (VPBLENDMWMasked512 dst (VPMAXUW512 x y) mask)
+       // result: (VPMAXUWMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVUSWB256 {
+               if v_1.Op != OpAMD64VPMAXUW512 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPMOVUSWBMasked256Merging)
-               v.AddArg3(dst, x, mask)
+               v.reset(OpAMD64VPMAXUWMasked512Merging)
+               v.AddArg4(dst, x, y, mask)
                return true
        }
        // match: (VPBLENDMWMasked512 dst (VPMINSW512 x y) mask)
@@ -41929,121 +42280,161 @@ func rewriteValueAMD64_OpAMD64VPBLENDMWMasked512(v *Value) bool {
                v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMWMasked512 dst (VPSRAVW512 x y) mask)
-       // result: (VPSRAVWMasked512Merging dst x y mask)
+       // match: (VPBLENDMWMasked512 dst (VPMINUW512 x y) mask)
+       // result: (VPMINUWMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSRAVW512 {
+               if v_1.Op != OpAMD64VPMINUW512 {
                        break
                }
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPSRAVWMasked512Merging)
+               v.reset(OpAMD64VPMINUWMasked512Merging)
                v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMWMasked512 dst (VPADDW512 x y) mask)
-       // result: (VPADDWMasked512Merging dst x y mask)
+       // match: (VPBLENDMWMasked512 dst (VPMOVSWB256 x) mask)
+       // result: (VPMOVSWBMasked256Merging dst x mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPADDW512 {
+               if v_1.Op != OpAMD64VPMOVSWB256 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPADDWMasked512Merging)
-               v.AddArg4(dst, x, y, mask)
+               v.reset(OpAMD64VPMOVSWBMasked256Merging)
+               v.AddArg3(dst, x, mask)
                return true
        }
-       // match: (VPBLENDMWMasked512 dst (VPSHUFHW512 [a] x) mask)
-       // result: (VPSHUFHWMasked512Merging dst [a] x mask)
+       // match: (VPBLENDMWMasked512 dst (VPMOVUSWB256 x) mask)
+       // result: (VPMOVUSWBMasked256Merging dst x mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSHUFHW512 {
+               if v_1.Op != OpAMD64VPMOVUSWB256 {
                        break
                }
-               a := auxIntToUint8(v_1.AuxInt)
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPSHUFHWMasked512Merging)
-               v.AuxInt = uint8ToAuxInt(a)
+               v.reset(OpAMD64VPMOVUSWBMasked256Merging)
                v.AddArg3(dst, x, mask)
                return true
        }
-       // match: (VPBLENDMWMasked512 dst (VPSHRDW512 [a] x y) mask)
-       // result: (VPSHRDWMasked512Merging dst [a] x y mask)
+       // match: (VPBLENDMWMasked512 dst (VPMOVWB256 x) mask)
+       // result: (VPMOVWBMasked256Merging dst x mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSHRDW512 {
+               if v_1.Op != OpAMD64VPMOVWB256 {
                        break
                }
-               a := auxIntToUint8(v_1.AuxInt)
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPSHRDWMasked512Merging)
-               v.AuxInt = uint8ToAuxInt(a)
-               v.AddArg4(dst, x, y, mask)
-               return true
+               v.reset(OpAMD64VPMOVWBMasked256Merging)
+               v.AddArg3(dst, x, mask)
+               return true
        }
-       // match: (VPBLENDMWMasked512 dst (VPSUBSW512 x y) mask)
-       // result: (VPSUBSWMasked512Merging dst x y mask)
+       // match: (VPBLENDMWMasked512 dst (VPMULHUW512 x y) mask)
+       // result: (VPMULHUWMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSUBSW512 {
+               if v_1.Op != OpAMD64VPMULHUW512 {
                        break
                }
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPSUBSWMasked512Merging)
+               v.reset(OpAMD64VPMULHUWMasked512Merging)
                v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMWMasked512 dst (VPSUBUSW512 x y) mask)
-       // result: (VPSUBUSWMasked512Merging dst x y mask)
+       // match: (VPBLENDMWMasked512 dst (VPMULHW512 x y) mask)
+       // result: (VPMULHWMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSUBUSW512 {
+               if v_1.Op != OpAMD64VPMULHW512 {
                        break
                }
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPSUBUSWMasked512Merging)
+               v.reset(OpAMD64VPMULHWMasked512Merging)
                v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMWMasked512 dst (VPSUBW512 x y) mask)
-       // result: (VPSUBWMasked512Merging dst x y mask)
+       // match: (VPBLENDMWMasked512 dst (VPMULLW512 x y) mask)
+       // result: (VPMULLWMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSUBW512 {
+               if v_1.Op != OpAMD64VPMULLW512 {
                        break
                }
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPSUBWMasked512Merging)
+               v.reset(OpAMD64VPMULLWMasked512Merging)
                v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMWMasked512 dst (VPMADDWD512 x y) mask)
-       // result: (VPMADDWDMasked512Merging dst x y mask)
+       // match: (VPBLENDMWMasked512 dst (VPOPCNTW512 x) mask)
+       // result: (VPOPCNTWMasked512Merging dst x mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMADDWD512 {
+               if v_1.Op != OpAMD64VPOPCNTW512 {
                        break
                }
+               x := v_1.Args[0]
+               mask := v_2
+               v.reset(OpAMD64VPOPCNTWMasked512Merging)
+               v.AddArg3(dst, x, mask)
+               return true
+       }
+       // match: (VPBLENDMWMasked512 dst (VPSHLDW512 [a] x y) mask)
+       // result: (VPSHLDWMasked512Merging dst [a] x y mask)
+       for {
+               dst := v_0
+               if v_1.Op != OpAMD64VPSHLDW512 {
+                       break
+               }
+               a := auxIntToUint8(v_1.AuxInt)
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPMADDWDMasked512Merging)
+               v.reset(OpAMD64VPSHLDWMasked512Merging)
+               v.AuxInt = uint8ToAuxInt(a)
+               v.AddArg4(dst, x, y, mask)
+               return true
+       }
+       // match: (VPBLENDMWMasked512 dst (VPSHRDW512 [a] x y) mask)
+       // result: (VPSHRDWMasked512Merging dst [a] x y mask)
+       for {
+               dst := v_0
+               if v_1.Op != OpAMD64VPSHRDW512 {
+                       break
+               }
+               a := auxIntToUint8(v_1.AuxInt)
+               y := v_1.Args[1]
+               x := v_1.Args[0]
+               mask := v_2
+               v.reset(OpAMD64VPSHRDWMasked512Merging)
+               v.AuxInt = uint8ToAuxInt(a)
                v.AddArg4(dst, x, y, mask)
                return true
        }
+       // match: (VPBLENDMWMasked512 dst (VPSHUFHW512 [a] x) mask)
+       // result: (VPSHUFHWMasked512Merging dst [a] x mask)
+       for {
+               dst := v_0
+               if v_1.Op != OpAMD64VPSHUFHW512 {
+                       break
+               }
+               a := auxIntToUint8(v_1.AuxInt)
+               x := v_1.Args[0]
+               mask := v_2
+               v.reset(OpAMD64VPSHUFHWMasked512Merging)
+               v.AuxInt = uint8ToAuxInt(a)
+               v.AddArg3(dst, x, mask)
+               return true
+       }
        // match: (VPBLENDMWMasked512 dst (VPSLLVW512 x y) mask)
        // result: (VPSLLVWMasked512Merging dst x y mask)
        for {
@@ -42058,19 +42449,35 @@ func rewriteValueAMD64_OpAMD64VPBLENDMWMasked512(v *Value) bool {
                v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMWMasked512 dst (VPABSW512 x) mask)
-       // result: (VPABSWMasked512Merging dst x mask)
+       // match: (VPBLENDMWMasked512 dst (VPSLLW512const [a] x) mask)
+       // result: (VPSLLWMasked512constMerging dst [a] x mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPABSW512 {
+               if v_1.Op != OpAMD64VPSLLW512const {
                        break
                }
+               a := auxIntToUint8(v_1.AuxInt)
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPABSWMasked512Merging)
+               v.reset(OpAMD64VPSLLWMasked512constMerging)
+               v.AuxInt = uint8ToAuxInt(a)
                v.AddArg3(dst, x, mask)
                return true
        }
+       // match: (VPBLENDMWMasked512 dst (VPSRAVW512 x y) mask)
+       // result: (VPSRAVWMasked512Merging dst x y mask)
+       for {
+               dst := v_0
+               if v_1.Op != OpAMD64VPSRAVW512 {
+                       break
+               }
+               y := v_1.Args[1]
+               x := v_1.Args[0]
+               mask := v_2
+               v.reset(OpAMD64VPSRAVWMasked512Merging)
+               v.AddArg4(dst, x, y, mask)
+               return true
+       }
        // match: (VPBLENDMWMasked512 dst (VPSRAW512const [a] x) mask)
        // result: (VPSRAWMasked512constMerging dst [a] x mask)
        for {
@@ -42086,158 +42493,584 @@ func rewriteValueAMD64_OpAMD64VPBLENDMWMasked512(v *Value) bool {
                v.AddArg3(dst, x, mask)
                return true
        }
-       // match: (VPBLENDMWMasked512 dst (VPADDUSW512 x y) mask)
-       // result: (VPADDUSWMasked512Merging dst x y mask)
+       // match: (VPBLENDMWMasked512 dst (VPSRLVW512 x y) mask)
+       // result: (VPSRLVWMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPADDUSW512 {
+               if v_1.Op != OpAMD64VPSRLVW512 {
                        break
                }
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPADDUSWMasked512Merging)
+               v.reset(OpAMD64VPSRLVWMasked512Merging)
                v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMWMasked512 dst (VPOPCNTW512 x) mask)
-       // result: (VPOPCNTWMasked512Merging dst x mask)
+       // match: (VPBLENDMWMasked512 dst (VPSUBSW512 x y) mask)
+       // result: (VPSUBSWMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPOPCNTW512 {
+               if v_1.Op != OpAMD64VPSUBSW512 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPOPCNTWMasked512Merging)
-               v.AddArg3(dst, x, mask)
+               v.reset(OpAMD64VPSUBSWMasked512Merging)
+               v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMWMasked512 dst (VPMINUW512 x y) mask)
-       // result: (VPMINUWMasked512Merging dst x y mask)
+       // match: (VPBLENDMWMasked512 dst (VPSUBUSW512 x y) mask)
+       // result: (VPSUBUSWMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMINUW512 {
+               if v_1.Op != OpAMD64VPSUBUSW512 {
                        break
                }
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPMINUWMasked512Merging)
+               v.reset(OpAMD64VPSUBUSWMasked512Merging)
                v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMWMasked512 dst (VPAVGW512 x y) mask)
-       // result: (VPAVGWMasked512Merging dst x y mask)
+       // match: (VPBLENDMWMasked512 dst (VPSUBW512 x y) mask)
+       // result: (VPSUBWMasked512Merging dst x y mask)
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPAVGW512 {
+               if v_1.Op != OpAMD64VPSUBW512 {
                        break
                }
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPAVGWMasked512Merging)
+               v.reset(OpAMD64VPSUBWMasked512Merging)
                v.AddArg4(dst, x, y, mask)
                return true
        }
-       // match: (VPBLENDMWMasked512 dst (VPMOVSWB256 x) mask)
-       // result: (VPMOVSWBMasked256Merging dst x mask)
+       return false
+}
+func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (VPBLENDVB128 dst (VADDPD128 x y) mask)
+       // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
+       // result: (VADDPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVSWB256 {
+               if v_1.Op != OpAMD64VADDPD128 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPMOVSWBMasked256Merging)
-               v.AddArg3(dst, x, mask)
+               if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
+                       break
+               }
+               v.reset(OpAMD64VADDPDMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDMWMasked512 dst (VPMAXUW512 x y) mask)
-       // result: (VPMAXUWMasked512Merging dst x y mask)
+       // match: (VPBLENDVB128 dst (VADDPS128 x y) mask)
+       // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
+       // result: (VADDPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMAXUW512 {
+               if v_1.Op != OpAMD64VADDPS128 {
                        break
                }
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPMAXUWMasked512Merging)
-               v.AddArg4(dst, x, y, mask)
+               if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
+                       break
+               }
+               v.reset(OpAMD64VADDPSMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDMWMasked512 dst (VPSRLVW512 x y) mask)
-       // result: (VPSRLVWMasked512Merging dst x y mask)
+       // match: (VPBLENDVB128 dst (VBROADCASTSD256 x) mask)
+       // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
+       // result: (VBROADCASTSDMasked256Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSRLVW512 {
+               if v_1.Op != OpAMD64VBROADCASTSD256 {
+                       break
+               }
+               x := v_1.Args[0]
+               mask := v_2
+               if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
+                       break
+               }
+               v.reset(OpAMD64VBROADCASTSDMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg3(dst, x, v0)
+               return true
+       }
+       // match: (VPBLENDVB128 dst (VBROADCASTSD512 x) mask)
+       // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
+       // result: (VBROADCASTSDMasked512Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       for {
+               dst := v_0
+               if v_1.Op != OpAMD64VBROADCASTSD512 {
+                       break
+               }
+               x := v_1.Args[0]
+               mask := v_2
+               if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
+                       break
+               }
+               v.reset(OpAMD64VBROADCASTSDMasked512Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg3(dst, x, v0)
+               return true
+       }
+       // match: (VPBLENDVB128 dst (VBROADCASTSS128 x) mask)
+       // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
+       // result: (VBROADCASTSSMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       for {
+               dst := v_0
+               if v_1.Op != OpAMD64VBROADCASTSS128 {
+                       break
+               }
+               x := v_1.Args[0]
+               mask := v_2
+               if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
+                       break
+               }
+               v.reset(OpAMD64VBROADCASTSSMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg3(dst, x, v0)
+               return true
+       }
+       // match: (VPBLENDVB128 dst (VBROADCASTSS256 x) mask)
+       // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
+       // result: (VBROADCASTSSMasked256Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       for {
+               dst := v_0
+               if v_1.Op != OpAMD64VBROADCASTSS256 {
+                       break
+               }
+               x := v_1.Args[0]
+               mask := v_2
+               if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
+                       break
+               }
+               v.reset(OpAMD64VBROADCASTSSMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg3(dst, x, v0)
+               return true
+       }
+       // match: (VPBLENDVB128 dst (VBROADCASTSS512 x) mask)
+       // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
+       // result: (VBROADCASTSSMasked512Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       for {
+               dst := v_0
+               if v_1.Op != OpAMD64VBROADCASTSS512 {
+                       break
+               }
+               x := v_1.Args[0]
+               mask := v_2
+               if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
+                       break
+               }
+               v.reset(OpAMD64VBROADCASTSSMasked512Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg3(dst, x, v0)
+               return true
+       }
+       // match: (VPBLENDVB128 dst (VCVTPS2UDQ128 x) mask)
+       // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
+       // result: (VCVTPS2UDQMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       for {
+               dst := v_0
+               if v_1.Op != OpAMD64VCVTPS2UDQ128 {
+                       break
+               }
+               x := v_1.Args[0]
+               mask := v_2
+               if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
+                       break
+               }
+               v.reset(OpAMD64VCVTPS2UDQMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg3(dst, x, v0)
+               return true
+       }
+       // match: (VPBLENDVB128 dst (VCVTTPS2DQ128 x) mask)
+       // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
+       // result: (VCVTTPS2DQMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       for {
+               dst := v_0
+               if v_1.Op != OpAMD64VCVTTPS2DQ128 {
+                       break
+               }
+               x := v_1.Args[0]
+               mask := v_2
+               if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
+                       break
+               }
+               v.reset(OpAMD64VCVTTPS2DQMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg3(dst, x, v0)
+               return true
+       }
+       // match: (VPBLENDVB128 dst (VDIVPD128 x y) mask)
+       // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
+       // result: (VDIVPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       for {
+               dst := v_0
+               if v_1.Op != OpAMD64VDIVPD128 {
                        break
                }
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPSRLVWMasked512Merging)
-               v.AddArg4(dst, x, y, mask)
+               if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
+                       break
+               }
+               v.reset(OpAMD64VDIVPDMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDMWMasked512 dst (VPSLLW512const [a] x) mask)
-       // result: (VPSLLWMasked512constMerging dst [a] x mask)
+       // match: (VPBLENDVB128 dst (VDIVPS128 x y) mask)
+       // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
+       // result: (VDIVPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSLLW512const {
+               if v_1.Op != OpAMD64VDIVPS128 {
                        break
                }
-               a := auxIntToUint8(v_1.AuxInt)
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPSLLWMasked512constMerging)
-               v.AuxInt = uint8ToAuxInt(a)
-               v.AddArg3(dst, x, mask)
+               if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
+                       break
+               }
+               v.reset(OpAMD64VDIVPSMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDMWMasked512 dst (VPADDSW512 x y) mask)
-       // result: (VPADDSWMasked512Merging dst x y mask)
+       // match: (VPBLENDVB128 dst (VGF2P8MULB128 x y) mask)
+       // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
+       // result: (VGF2P8MULBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPADDSW512 {
+               if v_1.Op != OpAMD64VGF2P8MULB128 {
+                       break
+               }
+               y := v_1.Args[1]
+               x := v_1.Args[0]
+               mask := v_2
+               if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
+                       break
+               }
+               v.reset(OpAMD64VGF2P8MULBMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg4(dst, x, y, v0)
+               return true
+       }
+       // match: (VPBLENDVB128 dst (VMAXPD128 x y) mask)
+       // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
+       // result: (VMAXPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       for {
+               dst := v_0
+               if v_1.Op != OpAMD64VMAXPD128 {
+                       break
+               }
+               y := v_1.Args[1]
+               x := v_1.Args[0]
+               mask := v_2
+               if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
+                       break
+               }
+               v.reset(OpAMD64VMAXPDMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg4(dst, x, y, v0)
+               return true
+       }
+       // match: (VPBLENDVB128 dst (VMAXPS128 x y) mask)
+       // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
+       // result: (VMAXPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       for {
+               dst := v_0
+               if v_1.Op != OpAMD64VMAXPS128 {
+                       break
+               }
+               y := v_1.Args[1]
+               x := v_1.Args[0]
+               mask := v_2
+               if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
+                       break
+               }
+               v.reset(OpAMD64VMAXPSMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg4(dst, x, y, v0)
+               return true
+       }
+       // match: (VPBLENDVB128 dst (VMINPD128 x y) mask)
+       // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
+       // result: (VMINPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       for {
+               dst := v_0
+               if v_1.Op != OpAMD64VMINPD128 {
+                       break
+               }
+               y := v_1.Args[1]
+               x := v_1.Args[0]
+               mask := v_2
+               if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
+                       break
+               }
+               v.reset(OpAMD64VMINPDMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg4(dst, x, y, v0)
+               return true
+       }
+       // match: (VPBLENDVB128 dst (VMINPS128 x y) mask)
+       // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
+       // result: (VMINPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       for {
+               dst := v_0
+               if v_1.Op != OpAMD64VMINPS128 {
+                       break
+               }
+               y := v_1.Args[1]
+               x := v_1.Args[0]
+               mask := v_2
+               if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
+                       break
+               }
+               v.reset(OpAMD64VMINPSMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg4(dst, x, y, v0)
+               return true
+       }
+       // match: (VPBLENDVB128 dst (VMULPD128 x y) mask)
+       // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
+       // result: (VMULPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       for {
+               dst := v_0
+               if v_1.Op != OpAMD64VMULPD128 {
+                       break
+               }
+               y := v_1.Args[1]
+               x := v_1.Args[0]
+               mask := v_2
+               if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
+                       break
+               }
+               v.reset(OpAMD64VMULPDMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg4(dst, x, y, v0)
+               return true
+       }
+       // match: (VPBLENDVB128 dst (VMULPS128 x y) mask)
+       // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
+       // result: (VMULPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       for {
+               dst := v_0
+               if v_1.Op != OpAMD64VMULPS128 {
+                       break
+               }
+               y := v_1.Args[1]
+               x := v_1.Args[0]
+               mask := v_2
+               if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
+                       break
+               }
+               v.reset(OpAMD64VMULPSMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg4(dst, x, y, v0)
+               return true
+       }
+       // match: (VPBLENDVB128 dst (VPABSB128 x) mask)
+       // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
+       // result: (VPABSBMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
+       for {
+               dst := v_0
+               if v_1.Op != OpAMD64VPABSB128 {
+                       break
+               }
+               x := v_1.Args[0]
+               mask := v_2
+               if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
+                       break
+               }
+               v.reset(OpAMD64VPABSBMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg3(dst, x, v0)
+               return true
+       }
+       // match: (VPBLENDVB128 dst (VPABSD128 x) mask)
+       // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
+       // result: (VPABSDMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       for {
+               dst := v_0
+               if v_1.Op != OpAMD64VPABSD128 {
+                       break
+               }
+               x := v_1.Args[0]
+               mask := v_2
+               if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
+                       break
+               }
+               v.reset(OpAMD64VPABSDMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg3(dst, x, v0)
+               return true
+       }
+       // match: (VPBLENDVB128 dst (VPABSQ128 x) mask)
+       // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
+       // result: (VPABSQMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       for {
+               dst := v_0
+               if v_1.Op != OpAMD64VPABSQ128 {
+                       break
+               }
+               x := v_1.Args[0]
+               mask := v_2
+               if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
+                       break
+               }
+               v.reset(OpAMD64VPABSQMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg3(dst, x, v0)
+               return true
+       }
+       // match: (VPBLENDVB128 dst (VPABSW128 x) mask)
+       // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
+       // result: (VPABSWMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
+       for {
+               dst := v_0
+               if v_1.Op != OpAMD64VPABSW128 {
+                       break
+               }
+               x := v_1.Args[0]
+               mask := v_2
+               if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
+                       break
+               }
+               v.reset(OpAMD64VPABSWMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg3(dst, x, v0)
+               return true
+       }
+       // match: (VPBLENDVB128 dst (VPACKSSDW128 x y) mask)
+       // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
+       // result: (VPACKSSDWMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       for {
+               dst := v_0
+               if v_1.Op != OpAMD64VPACKSSDW128 {
+                       break
+               }
+               y := v_1.Args[1]
+               x := v_1.Args[0]
+               mask := v_2
+               if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
+                       break
+               }
+               v.reset(OpAMD64VPACKSSDWMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg4(dst, x, y, v0)
+               return true
+       }
+       // match: (VPBLENDVB128 dst (VPACKUSDW128 x y) mask)
+       // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
+       // result: (VPACKUSDWMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       for {
+               dst := v_0
+               if v_1.Op != OpAMD64VPACKUSDW128 {
+                       break
+               }
+               y := v_1.Args[1]
+               x := v_1.Args[0]
+               mask := v_2
+               if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
+                       break
+               }
+               v.reset(OpAMD64VPACKUSDWMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg4(dst, x, y, v0)
+               return true
+       }
+       // match: (VPBLENDVB128 dst (VPADDB128 x y) mask)
+       // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
+       // result: (VPADDBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+       for {
+               dst := v_0
+               if v_1.Op != OpAMD64VPADDB128 {
                        break
                }
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPADDSWMasked512Merging)
-               v.AddArg4(dst, x, y, mask)
+               if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
+                       break
+               }
+               v.reset(OpAMD64VPADDBMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDMWMasked512 dst (VPMULLW512 x y) mask)
-       // result: (VPMULLWMasked512Merging dst x y mask)
+       // match: (VPBLENDVB128 dst (VPADDD128 x y) mask)
+       // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
+       // result: (VPADDDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMULLW512 {
+               if v_1.Op != OpAMD64VPADDD128 {
                        break
                }
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
-               v.reset(OpAMD64VPMULLWMasked512Merging)
-               v.AddArg4(dst, x, y, mask)
+               if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
+                       break
+               }
+               v.reset(OpAMD64VPADDDMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (VPBLENDVB128 dst (VPMINUD128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPADDQ128 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMINUDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // result: (VPADDQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMINUD128 {
+               if v_1.Op != OpAMD64VPADDQ128 {
                        break
                }
                y := v_1.Args[1]
@@ -42246,39 +43079,38 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMINUDMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPADDQMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPROLQ128 [a] x) mask)
+       // match: (VPBLENDVB128 dst (VPADDSB128 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPROLQMasked128Merging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // result: (VPADDSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPROLQ128 {
+               if v_1.Op != OpAMD64VPADDSB128 {
                        break
                }
-               a := auxIntToUint8(v_1.AuxInt)
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPROLQMasked128Merging)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v.reset(OpAMD64VPADDSBMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPMADDUBSW128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPADDSW128 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMADDUBSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // result: (VPADDSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMADDUBSW128 {
+               if v_1.Op != OpAMD64VPADDSW128 {
                        break
                }
                y := v_1.Args[1]
@@ -42287,18 +43119,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMADDUBSWMasked128Merging)
+               v.reset(OpAMD64VPADDSWMasked128Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPMAXSB128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPADDUSB128 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMAXSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // result: (VPADDUSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMAXSB128 {
+               if v_1.Op != OpAMD64VPADDUSB128 {
                        break
                }
                y := v_1.Args[1]
@@ -42307,18 +43139,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMAXSBMasked128Merging)
+               v.reset(OpAMD64VPADDUSBMasked128Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPADDSB128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPADDUSW128 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPADDSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // result: (VPADDUSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPADDSB128 {
+               if v_1.Op != OpAMD64VPADDUSW128 {
                        break
                }
                y := v_1.Args[1]
@@ -42327,56 +43159,58 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPADDSBMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
+               v.reset(OpAMD64VPADDUSWMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VBROADCASTSS256 x) mask)
+       // match: (VPBLENDVB128 dst (VPADDW128 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VBROADCASTSSMasked256Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // result: (VPADDWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VBROADCASTSS256 {
+               if v_1.Op != OpAMD64VPADDW128 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VBROADCASTSSMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPADDWMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPMOVSXBW128 x) mask)
+       // match: (VPBLENDVB128 dst (VPAVGB128 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMOVSXBWMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // result: (VPAVGBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVSXBW128 {
+               if v_1.Op != OpAMD64VPAVGB128 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMOVSXBWMasked128Merging)
+               v.reset(OpAMD64VPAVGBMasked128Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPMINSQ128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPAVGW128 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMINSQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // result: (VPAVGWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMINSQ128 {
+               if v_1.Op != OpAMD64VPAVGW128 {
                        break
                }
                y := v_1.Args[1]
@@ -42385,38 +43219,37 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMINSQMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v.reset(OpAMD64VPAVGWMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPSUBUSW128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPBROADCASTB128 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSUBUSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // result: (VPBROADCASTBMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSUBUSW128 {
+               if v_1.Op != OpAMD64VPBROADCASTB128 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSUBUSWMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPBROADCASTBMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPMOVSXBQ512 x) mask)
+       // match: (VPBLENDVB128 dst (VPBROADCASTB256 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMOVSXBQMasked512Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // result: (VPBROADCASTBMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVSXBQ512 {
+               if v_1.Op != OpAMD64VPBROADCASTB256 {
                        break
                }
                x := v_1.Args[0]
@@ -42424,18 +43257,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMOVSXBQMasked512Merging)
+               v.reset(OpAMD64VPBROADCASTBMasked256Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPMOVZXWQ256 x) mask)
+       // match: (VPBLENDVB128 dst (VPBROADCASTB512 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMOVZXWQMasked256Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // result: (VPBROADCASTBMasked512Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVZXWQ256 {
+               if v_1.Op != OpAMD64VPBROADCASTB512 {
                        break
                }
                x := v_1.Args[0]
@@ -42443,60 +43276,56 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMOVZXWQMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPBROADCASTBMasked512Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPMULLW128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPBROADCASTD128 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMULLWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // result: (VPBROADCASTDMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMULLW128 {
+               if v_1.Op != OpAMD64VPBROADCASTD128 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMULLWMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPBROADCASTDMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPSHLDQ128 [a] x y) mask)
+       // match: (VPBLENDVB128 dst (VPBROADCASTD256 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSHLDQMasked128Merging dst [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // result: (VPBROADCASTDMasked256Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSHLDQ128 {
+               if v_1.Op != OpAMD64VPBROADCASTD256 {
                        break
                }
-               a := auxIntToUint8(v_1.AuxInt)
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSHLDQMasked128Merging)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v.reset(OpAMD64VPBROADCASTDMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPMOVZXBQ256 x) mask)
+       // match: (VPBLENDVB128 dst (VPBROADCASTD512 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMOVZXBQMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // result: (VPBROADCASTDMasked512Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVZXBQ256 {
+               if v_1.Op != OpAMD64VPBROADCASTD512 {
                        break
                }
                x := v_1.Args[0]
@@ -42504,38 +43333,37 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMOVZXBQMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
+               v.reset(OpAMD64VPBROADCASTDMasked512Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPMAXSQ128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPBROADCASTQ128 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMAXSQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // result: (VPBROADCASTQMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMAXSQ128 {
+               if v_1.Op != OpAMD64VPBROADCASTQ128 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMAXSQMasked128Merging)
+               v.reset(OpAMD64VPBROADCASTQMasked128Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPOPCNTW128 x) mask)
+       // match: (VPBLENDVB128 dst (VPBROADCASTQ256 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPOPCNTWMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // result: (VPBROADCASTQMasked256Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPOPCNTW128 {
+               if v_1.Op != OpAMD64VPBROADCASTQ256 {
                        break
                }
                x := v_1.Args[0]
@@ -42543,18 +43371,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPOPCNTWMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPBROADCASTQMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPBROADCASTW128 x) mask)
+       // match: (VPBLENDVB128 dst (VPBROADCASTQ512 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPBROADCASTWMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // result: (VPBROADCASTQMasked512Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPBROADCASTW128 {
+               if v_1.Op != OpAMD64VPBROADCASTQ512 {
                        break
                }
                x := v_1.Args[0]
@@ -42562,38 +43390,37 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPBROADCASTWMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPBROADCASTQMasked512Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPRORVD128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPBROADCASTW128 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPRORVDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // result: (VPBROADCASTWMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPRORVD128 {
+               if v_1.Op != OpAMD64VPBROADCASTW128 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPRORVDMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPBROADCASTWMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VBROADCASTSD256 x) mask)
+       // match: (VPBLENDVB128 dst (VPBROADCASTW256 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VBROADCASTSDMasked256Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // result: (VPBROADCASTWMasked256Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VBROADCASTSD256 {
+               if v_1.Op != OpAMD64VPBROADCASTW256 {
                        break
                }
                x := v_1.Args[0]
@@ -42601,18 +43428,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VBROADCASTSDMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v.reset(OpAMD64VPBROADCASTWMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPMOVZXDQ128 x) mask)
+       // match: (VPBLENDVB128 dst (VPBROADCASTW512 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMOVZXDQMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // result: (VPBROADCASTWMasked512Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVZXDQ128 {
+               if v_1.Op != OpAMD64VPBROADCASTW512 {
                        break
                }
                x := v_1.Args[0]
@@ -42620,78 +43447,76 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMOVZXDQMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPBROADCASTWMasked512Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPSRAQ128const [a] x) mask)
+       // match: (VPBLENDVB128 dst (VPLZCNTD128 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSRAQMasked128constMerging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // result: (VPLZCNTDMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSRAQ128const {
+               if v_1.Op != OpAMD64VPLZCNTD128 {
                        break
                }
-               a := auxIntToUint8(v_1.AuxInt)
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSRAQMasked128constMerging)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v.reset(OpAMD64VPLZCNTDMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPACKUSDW128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPLZCNTQ128 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPACKUSDWMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // result: (VPLZCNTQMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPACKUSDW128 {
+               if v_1.Op != OpAMD64VPLZCNTQ128 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPACKUSDWMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPLZCNTQMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPLZCNTD128 x) mask)
+       // match: (VPBLENDVB128 dst (VPMADDUBSW128 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPLZCNTDMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // result: (VPMADDUBSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPLZCNTD128 {
+               if v_1.Op != OpAMD64VPMADDUBSW128 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPLZCNTDMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPMADDUBSWMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPMAXUD128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPMADDWD128 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMAXUDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // result: (VPMADDWDMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMAXUD128 {
+               if v_1.Op != OpAMD64VPMADDWD128 {
                        break
                }
                y := v_1.Args[1]
@@ -42700,56 +43525,58 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMAXUDMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPMADDWDMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPOPCNTB128 x) mask)
+       // match: (VPBLENDVB128 dst (VPMAXSB128 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPOPCNTBMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // result: (VPMAXSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPOPCNTB128 {
+               if v_1.Op != OpAMD64VPMAXSB128 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPOPCNTBMasked128Merging)
+               v.reset(OpAMD64VPMAXSBMasked128Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VBROADCASTSD512 x) mask)
+       // match: (VPBLENDVB128 dst (VPMAXSD128 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VBROADCASTSDMasked512Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // result: (VPMAXSDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VBROADCASTSD512 {
+               if v_1.Op != OpAMD64VPMAXSD128 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VBROADCASTSDMasked512Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v.reset(OpAMD64VPMAXSDMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VMINPD128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPMAXSQ128 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VMINPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // result: (VPMAXSQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VMINPD128 {
+               if v_1.Op != OpAMD64VPMAXSQ128 {
                        break
                }
                y := v_1.Args[1]
@@ -42758,40 +43585,38 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VMINPDMasked128Merging)
+               v.reset(OpAMD64VPMAXSQMasked128Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPSHRDW128 [a] x y) mask)
+       // match: (VPBLENDVB128 dst (VPMAXSW128 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSHRDWMasked128Merging dst [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // result: (VPMAXSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSHRDW128 {
+               if v_1.Op != OpAMD64VPMAXSW128 {
                        break
                }
-               a := auxIntToUint8(v_1.AuxInt)
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSHRDWMasked128Merging)
-               v.AuxInt = uint8ToAuxInt(a)
+               v.reset(OpAMD64VPMAXSWMasked128Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VADDPD128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPMAXUB128 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VADDPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // result: (VPMAXUBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VADDPD128 {
+               if v_1.Op != OpAMD64VPMAXUB128 {
                        break
                }
                y := v_1.Args[1]
@@ -42800,56 +43625,58 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VADDPDMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v.reset(OpAMD64VPMAXUBMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPMOVZXWD256 x) mask)
+       // match: (VPBLENDVB128 dst (VPMAXUD128 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMOVZXWDMasked256Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // result: (VPMAXUDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVZXWD256 {
+               if v_1.Op != OpAMD64VPMAXUD128 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMOVZXWDMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPMAXUDMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPMOVSXWQ256 x) mask)
+       // match: (VPBLENDVB128 dst (VPMAXUQ128 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMOVSXWQMasked256Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // result: (VPMAXUQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVSXWQ256 {
+               if v_1.Op != OpAMD64VPMAXUQ128 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMOVSXWQMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPMAXUQMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPSUBSW128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPMAXUW128 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSUBSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // result: (VPMAXUWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSUBSW128 {
+               if v_1.Op != OpAMD64VPMAXUW128 {
                        break
                }
                y := v_1.Args[1]
@@ -42858,79 +43685,78 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSUBSWMasked128Merging)
+               v.reset(OpAMD64VPMAXUWMasked128Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VREDUCEPD128 [a] x) mask)
+       // match: (VPBLENDVB128 dst (VPMINSB128 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VREDUCEPDMasked128Merging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // result: (VPMINSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VREDUCEPD128 {
+               if v_1.Op != OpAMD64VPMINSB128 {
                        break
                }
-               a := auxIntToUint8(v_1.AuxInt)
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VREDUCEPDMasked128Merging)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v.reset(OpAMD64VPMINSBMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPMOVZXBD256 x) mask)
+       // match: (VPBLENDVB128 dst (VPMINSD128 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMOVZXBDMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // result: (VPMINSDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVZXBD256 {
+               if v_1.Op != OpAMD64VPMINSD128 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMOVZXBDMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
+               v.reset(OpAMD64VPMINSDMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPRORQ128 [a] x) mask)
+       // match: (VPBLENDVB128 dst (VPMINSQ128 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPRORQMasked128Merging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // result: (VPMINSQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPRORQ128 {
+               if v_1.Op != OpAMD64VPMINSQ128 {
                        break
                }
-               a := auxIntToUint8(v_1.AuxInt)
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPRORQMasked128Merging)
-               v.AuxInt = uint8ToAuxInt(a)
+               v.reset(OpAMD64VPMINSQMasked128Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPSLLVW128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPMINSW128 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSLLVWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // result: (VPMINSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSLLVW128 {
+               if v_1.Op != OpAMD64VPMINSW128 {
                        break
                }
                y := v_1.Args[1]
@@ -42939,37 +43765,38 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSLLVWMasked128Merging)
+               v.reset(OpAMD64VPMINSWMasked128Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPMOVSXBW256 x) mask)
+       // match: (VPBLENDVB128 dst (VPMINUB128 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMOVSXBWMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // result: (VPMINUBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVSXBW256 {
+               if v_1.Op != OpAMD64VPMINUB128 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMOVSXBWMasked256Merging)
+               v.reset(OpAMD64VPMINUBMasked128Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPMINSD128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPMINUD128 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMINSDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // result: (VPMINUDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMINSD128 {
+               if v_1.Op != OpAMD64VPMINUD128 {
                        break
                }
                y := v_1.Args[1]
@@ -42978,18 +43805,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMINSDMasked128Merging)
+               v.reset(OpAMD64VPMINUDMasked128Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VADDPS128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPMINUQ128 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VADDPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // result: (VPMINUQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VADDPS128 {
+               if v_1.Op != OpAMD64VPMINUQ128 {
                        break
                }
                y := v_1.Args[1]
@@ -42998,37 +43825,38 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VADDPSMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPMINUQMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPMOVSXBD256 x) mask)
+       // match: (VPBLENDVB128 dst (VPMINUW128 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMOVSXBDMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // result: (VPMINUWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVSXBD256 {
+               if v_1.Op != OpAMD64VPMINUW128 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMOVSXBDMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
+               v.reset(OpAMD64VPMINUWMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPMOVSXDQ128 x) mask)
+       // match: (VPBLENDVB128 dst (VPMOVDB128_128 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMOVSXDQMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // result: (VPMOVDBMasked128_128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVSXDQ128 {
+               if v_1.Op != OpAMD64VPMOVDB128_128 {
                        break
                }
                x := v_1.Args[0]
@@ -43036,138 +43864,132 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMOVSXDQMasked128Merging)
+               v.reset(OpAMD64VPMOVDBMasked128_128Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPROLVD128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPMOVDW128_128 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPROLVDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // result: (VPMOVDWMasked128_128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPROLVD128 {
+               if v_1.Op != OpAMD64VPMOVDW128_128 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPROLVDMasked128Merging)
+               v.reset(OpAMD64VPMOVDWMasked128_128Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPSRLVQ128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPMOVQB128_128 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSRLVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // result: (VPMOVQBMasked128_128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSRLVQ128 {
+               if v_1.Op != OpAMD64VPMOVQB128_128 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSRLVQMasked128Merging)
+               v.reset(OpAMD64VPMOVQBMasked128_128Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPMAXSD128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPMOVQD128_128 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMAXSDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // result: (VPMOVQDMasked128_128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMAXSD128 {
+               if v_1.Op != OpAMD64VPMOVQD128_128 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMAXSDMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPMOVQDMasked128_128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPMINUB128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPMOVQW128_128 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMINUBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // result: (VPMOVQWMasked128_128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMINUB128 {
+               if v_1.Op != OpAMD64VPMOVQW128_128 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMINUBMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
+               v.reset(OpAMD64VPMOVQWMasked128_128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPMULLQ128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPMOVSDB128_128 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMULLQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // result: (VPMOVSDBMasked128_128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMULLQ128 {
+               if v_1.Op != OpAMD64VPMOVSDB128_128 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMULLQMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v.reset(OpAMD64VPMOVSDBMasked128_128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPSUBD128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPMOVSDW128_128 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSUBDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // result: (VPMOVSDWMasked128_128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSUBD128 {
+               if v_1.Op != OpAMD64VPMOVSDW128_128 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSUBDMasked128Merging)
+               v.reset(OpAMD64VPMOVSDWMasked128_128Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPBROADCASTD512 x) mask)
+       // match: (VPBLENDVB128 dst (VPMOVSQB128_128 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPBROADCASTDMasked512Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // result: (VPMOVSQBMasked128_128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPBROADCASTD512 {
+               if v_1.Op != OpAMD64VPMOVSQB128_128 {
                        break
                }
                x := v_1.Args[0]
@@ -43175,120 +43997,113 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPBROADCASTDMasked512Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPMOVSQBMasked128_128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPMADDWD128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPMOVSQD128_128 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMADDWDMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // result: (VPMOVSQDMasked128_128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMADDWD128 {
+               if v_1.Op != OpAMD64VPMOVSQD128_128 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMADDWDMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPMOVSQDMasked128_128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPROLD128 [a] x) mask)
+       // match: (VPBLENDVB128 dst (VPMOVSQW128_128 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPROLDMasked128Merging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // result: (VPMOVSQWMasked128_128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPROLD128 {
+               if v_1.Op != OpAMD64VPMOVSQW128_128 {
                        break
                }
-               a := auxIntToUint8(v_1.AuxInt)
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPROLDMasked128Merging)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPMOVSQWMasked128_128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPSRAD128const [a] x) mask)
+       // match: (VPBLENDVB128 dst (VPMOVSWB128_128 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSRADMasked128constMerging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // result: (VPMOVSWBMasked128_128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSRAD128const {
+               if v_1.Op != OpAMD64VPMOVSWB128_128 {
                        break
                }
-               a := auxIntToUint8(v_1.AuxInt)
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSRADMasked128constMerging)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPMOVSWBMasked128_128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPSUBUSB128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPMOVSXBD128 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSUBUSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // result: (VPMOVSXBDMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSUBUSB128 {
+               if v_1.Op != OpAMD64VPMOVSXBD128 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSUBUSBMasked128Merging)
+               v.reset(OpAMD64VPMOVSXBDMasked128Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPADDUSB128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPMOVSXBD256 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPADDUSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // result: (VPMOVSXBDMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPADDUSB128 {
+               if v_1.Op != OpAMD64VPMOVSXBD256 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPADDUSBMasked128Merging)
+               v.reset(OpAMD64VPMOVSXBDMasked256Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPMOVZXBW128 x) mask)
+       // match: (VPBLENDVB128 dst (VPMOVSXBD512 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMOVZXBWMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // result: (VPMOVSXBDMasked512Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVZXBW128 {
+               if v_1.Op != OpAMD64VPMOVSXBD512 {
                        break
                }
                x := v_1.Args[0]
@@ -43296,18 +44111,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMOVZXBWMasked128Merging)
+               v.reset(OpAMD64VPMOVSXBDMasked512Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPMOVZXDQ256 x) mask)
+       // match: (VPBLENDVB128 dst (VPMOVSXBQ128 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMOVZXDQMasked256Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // result: (VPMOVSXBQMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVZXDQ256 {
+               if v_1.Op != OpAMD64VPMOVSXBQ128 {
                        break
                }
                x := v_1.Args[0]
@@ -43315,98 +44130,94 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMOVZXDQMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPMOVSXBQMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPROLVQ128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPMOVSXBQ256 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPROLVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // result: (VPMOVSXBQMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPROLVQ128 {
+               if v_1.Op != OpAMD64VPMOVSXBQ256 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPROLVQMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v.reset(OpAMD64VPMOVSXBQMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPADDB128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPMOVSXBQ512 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPADDBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // result: (VPMOVSXBQMasked512Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPADDB128 {
+               if v_1.Op != OpAMD64VPMOVSXBQ512 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPADDBMasked128Merging)
+               v.reset(OpAMD64VPMOVSXBQMasked512Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPADDQ128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPMOVSXBW128 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPADDQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // result: (VPMOVSXBWMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPADDQ128 {
+               if v_1.Op != OpAMD64VPMOVSXBW128 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPADDQMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v.reset(OpAMD64VPMOVSXBWMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPADDUSW128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPMOVSXBW256 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPADDUSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // result: (VPMOVSXBWMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPADDUSW128 {
+               if v_1.Op != OpAMD64VPMOVSXBW256 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPADDUSWMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPMOVSXBWMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPBROADCASTB128 x) mask)
+       // match: (VPBLENDVB128 dst (VPMOVSXDQ128 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPBROADCASTBMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // result: (VPMOVSXDQMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPBROADCASTB128 {
+               if v_1.Op != OpAMD64VPMOVSXDQ128 {
                        break
                }
                x := v_1.Args[0]
@@ -43414,140 +44225,132 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPBROADCASTBMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
+               v.reset(OpAMD64VPMOVSXDQMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VRNDSCALEPS128 [a] x) mask)
+       // match: (VPBLENDVB128 dst (VPMOVSXDQ256 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VRNDSCALEPSMasked128Merging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // result: (VPMOVSXDQMasked256Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VRNDSCALEPS128 {
+               if v_1.Op != OpAMD64VPMOVSXDQ256 {
                        break
                }
-               a := auxIntToUint8(v_1.AuxInt)
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VRNDSCALEPSMasked128Merging)
-               v.AuxInt = uint8ToAuxInt(a)
+               v.reset(OpAMD64VPMOVSXDQMasked256Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPMINUW128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPMOVSXWD128 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMINUWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // result: (VPMOVSXWDMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMINUW128 {
+               if v_1.Op != OpAMD64VPMOVSXWD128 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMINUWMasked128Merging)
+               v.reset(OpAMD64VPMOVSXWDMasked128Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPMINSW128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPMOVSXWD256 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMINSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // result: (VPMOVSXWDMasked256Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMINSW128 {
+               if v_1.Op != OpAMD64VPMOVSXWD256 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMINSWMasked128Merging)
+               v.reset(OpAMD64VPMOVSXWDMasked256Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPMULLD128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPMOVSXWQ128 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMULLDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // result: (VPMOVSXWQMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMULLD128 {
+               if v_1.Op != OpAMD64VPMOVSXWQ128 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMULLDMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPMOVSXWQMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPSHUFB128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPMOVSXWQ256 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSHUFBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // result: (VPMOVSXWQMasked256Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSHUFB128 {
+               if v_1.Op != OpAMD64VPMOVSXWQ256 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSHUFBMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
+               v.reset(OpAMD64VPMOVSXWQMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPRORD128 [a] x) mask)
+       // match: (VPBLENDVB128 dst (VPMOVSXWQ512 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPRORDMasked128Merging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // result: (VPMOVSXWQMasked512Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPRORD128 {
+               if v_1.Op != OpAMD64VPMOVSXWQ512 {
                        break
                }
-               a := auxIntToUint8(v_1.AuxInt)
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPRORDMasked128Merging)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPMOVSXWQMasked512Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VCVTTPS2DQ128 x) mask)
+       // match: (VPBLENDVB128 dst (VPMOVUSDB128_128 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VCVTTPS2DQMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // result: (VPMOVUSDBMasked128_128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VCVTTPS2DQ128 {
+               if v_1.Op != OpAMD64VPMOVUSDB128_128 {
                        break
                }
                x := v_1.Args[0]
@@ -43555,58 +44358,56 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VCVTTPS2DQMasked128Merging)
+               v.reset(OpAMD64VPMOVUSDBMasked128_128Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VMINPS128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPMOVUSDW128_128 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VMINPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // result: (VPMOVUSDWMasked128_128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VMINPS128 {
+               if v_1.Op != OpAMD64VPMOVUSDW128_128 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VMINPSMasked128Merging)
+               v.reset(OpAMD64VPMOVUSDWMasked128_128Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VSUBPD128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPMOVUSQB128_128 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VSUBPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // result: (VPMOVUSQBMasked128_128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VSUBPD128 {
+               if v_1.Op != OpAMD64VPMOVUSQB128_128 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VSUBPDMasked128Merging)
+               v.reset(OpAMD64VPMOVUSQBMasked128_128Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPBROADCASTB512 x) mask)
+       // match: (VPBLENDVB128 dst (VPMOVUSQD128_128 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPBROADCASTBMasked512Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // result: (VPMOVUSQDMasked128_128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPBROADCASTB512 {
+               if v_1.Op != OpAMD64VPMOVUSQD128_128 {
                        break
                }
                x := v_1.Args[0]
@@ -43614,18 +44415,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPBROADCASTBMasked512Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
+               v.reset(OpAMD64VPMOVUSQDMasked128_128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VRCP14PD128 x) mask)
+       // match: (VPBLENDVB128 dst (VPMOVUSQW128_128 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VRCP14PDMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // result: (VPMOVUSQWMasked128_128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VRCP14PD128 {
+               if v_1.Op != OpAMD64VPMOVUSQW128_128 {
                        break
                }
                x := v_1.Args[0]
@@ -43633,18 +44434,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VRCP14PDMasked128Merging)
+               v.reset(OpAMD64VPMOVUSQWMasked128_128Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPMOVSXWD256 x) mask)
+       // match: (VPBLENDVB128 dst (VPMOVUSWB128_128 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMOVSXWDMasked256Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // result: (VPMOVUSWBMasked128_128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVSXWD256 {
+               if v_1.Op != OpAMD64VPMOVUSWB128_128 {
                        break
                }
                x := v_1.Args[0]
@@ -43652,18 +44453,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMOVSXWDMasked256Merging)
+               v.reset(OpAMD64VPMOVUSWBMasked128_128Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPBROADCASTW256 x) mask)
+       // match: (VPBLENDVB128 dst (VPMOVWB128_128 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPBROADCASTWMasked256Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // result: (VPMOVWBMasked128_128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPBROADCASTW256 {
+               if v_1.Op != OpAMD64VPMOVWB128_128 {
                        break
                }
                x := v_1.Args[0]
@@ -43671,18 +44472,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPBROADCASTWMasked256Merging)
+               v.reset(OpAMD64VPMOVWBMasked128_128Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPBROADCASTD256 x) mask)
+       // match: (VPBLENDVB128 dst (VPMOVZXBD128 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPBROADCASTDMasked256Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // result: (VPMOVZXBDMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPBROADCASTD256 {
+               if v_1.Op != OpAMD64VPMOVZXBD128 {
                        break
                }
                x := v_1.Args[0]
@@ -43690,38 +44491,37 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPBROADCASTDMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPMOVZXBDMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPADDD128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPMOVZXBD256 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPADDDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // result: (VPMOVZXBDMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPADDD128 {
+               if v_1.Op != OpAMD64VPMOVZXBD256 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPADDDMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPMOVZXBDMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VBROADCASTSS128 x) mask)
+       // match: (VPBLENDVB128 dst (VPMOVZXBD512 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VBROADCASTSSMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // result: (VPMOVZXBDMasked512Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VBROADCASTSS128 {
+               if v_1.Op != OpAMD64VPMOVZXBD512 {
                        break
                }
                x := v_1.Args[0]
@@ -43729,18 +44529,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VBROADCASTSSMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPMOVZXBDMasked512Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPMOVSXDQ256 x) mask)
+       // match: (VPBLENDVB128 dst (VPMOVZXBQ128 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMOVSXDQMasked256Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // result: (VPMOVZXBQMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVSXDQ256 {
+               if v_1.Op != OpAMD64VPMOVZXBQ128 {
                        break
                }
                x := v_1.Args[0]
@@ -43748,18 +44548,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMOVSXDQMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPMOVZXBQMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPMOVSXBD512 x) mask)
+       // match: (VPBLENDVB128 dst (VPMOVZXBQ256 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMOVSXBDMasked512Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // result: (VPMOVZXBQMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVSXBD512 {
+               if v_1.Op != OpAMD64VPMOVZXBQ256 {
                        break
                }
                x := v_1.Args[0]
@@ -43767,102 +44567,94 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMOVSXBDMasked512Merging)
+               v.reset(OpAMD64VPMOVZXBQMasked256Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPSHLDW128 [a] x y) mask)
+       // match: (VPBLENDVB128 dst (VPMOVZXBQ512 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSHLDWMasked128Merging dst [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // result: (VPMOVZXBQMasked512Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSHLDW128 {
+               if v_1.Op != OpAMD64VPMOVZXBQ512 {
                        break
                }
-               a := auxIntToUint8(v_1.AuxInt)
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSHLDWMasked128Merging)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPMOVZXBQMasked512Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPMAXUQ128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPMOVZXBW128 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMAXUQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // result: (VPMOVZXBWMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMAXUQ128 {
+               if v_1.Op != OpAMD64VPMOVZXBW128 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMAXUQMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v.reset(OpAMD64VPMOVZXBWMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPSHLDD128 [a] x y) mask)
+       // match: (VPBLENDVB128 dst (VPMOVZXBW256 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSHLDDMasked128Merging dst [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // result: (VPMOVZXBWMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSHLDD128 {
+               if v_1.Op != OpAMD64VPMOVZXBW256 {
                        break
                }
-               a := auxIntToUint8(v_1.AuxInt)
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSHLDDMasked128Merging)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPMOVZXBWMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VSUBPS128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPMOVZXDQ128 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VSUBPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // result: (VPMOVZXDQMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VSUBPS128 {
+               if v_1.Op != OpAMD64VPMOVZXDQ128 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VSUBPSMasked128Merging)
+               v.reset(OpAMD64VPMOVZXDQMasked128Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPBROADCASTQ128 x) mask)
+       // match: (VPBLENDVB128 dst (VPMOVZXDQ256 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPBROADCASTQMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // result: (VPMOVZXDQMasked256Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPBROADCASTQ128 {
+               if v_1.Op != OpAMD64VPMOVZXDQ256 {
                        break
                }
                x := v_1.Args[0]
@@ -43870,38 +44662,37 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPBROADCASTQMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v.reset(OpAMD64VPMOVZXDQMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPRORVQ128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPMOVZXWD128 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPRORVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // result: (VPMOVZXWDMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPRORVQ128 {
+               if v_1.Op != OpAMD64VPMOVZXWD128 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPRORVQMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v.reset(OpAMD64VPMOVZXWDMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VBROADCASTSS512 x) mask)
+       // match: (VPBLENDVB128 dst (VPMOVZXWD256 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VBROADCASTSSMasked512Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // result: (VPMOVZXWDMasked256Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VBROADCASTSS512 {
+               if v_1.Op != OpAMD64VPMOVZXWD256 {
                        break
                }
                x := v_1.Args[0]
@@ -43909,18 +44700,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VBROADCASTSSMasked512Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPMOVZXWDMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPMOVZXBD128 x) mask)
+       // match: (VPBLENDVB128 dst (VPMOVZXWQ128 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMOVZXBDMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // result: (VPMOVZXWQMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVZXBD128 {
+               if v_1.Op != OpAMD64VPMOVZXWQ128 {
                        break
                }
                x := v_1.Args[0]
@@ -43928,18 +44719,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMOVZXBDMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
+               v.reset(OpAMD64VPMOVZXWQMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPMOVZXBQ128 x) mask)
+       // match: (VPBLENDVB128 dst (VPMOVZXWQ256 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMOVZXBQMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // result: (VPMOVZXWQMasked256Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVZXBQ128 {
+               if v_1.Op != OpAMD64VPMOVZXWQ256 {
                        break
                }
                x := v_1.Args[0]
@@ -43947,18 +44738,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMOVZXBQMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
+               v.reset(OpAMD64VPMOVZXWQMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPABSW128 x) mask)
+       // match: (VPBLENDVB128 dst (VPMOVZXWQ512 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPABSWMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // result: (VPMOVZXWQMasked512Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPABSW128 {
+               if v_1.Op != OpAMD64VPMOVZXWQ512 {
                        break
                }
                x := v_1.Args[0]
@@ -43966,18 +44757,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPABSWMasked128Merging)
+               v.reset(OpAMD64VPMOVZXWQMasked512Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPSUBW128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPMULHUW128 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSUBWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // result: (VPMULHUWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSUBW128 {
+               if v_1.Op != OpAMD64VPMULHUW128 {
                        break
                }
                y := v_1.Args[1]
@@ -43986,37 +44777,38 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSUBWMasked128Merging)
+               v.reset(OpAMD64VPMULHUWMasked128Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPMOVZXWQ128 x) mask)
+       // match: (VPBLENDVB128 dst (VPMULHW128 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMOVZXWQMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // result: (VPMULHWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVZXWQ128 {
+               if v_1.Op != OpAMD64VPMULHW128 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMOVZXWQMasked128Merging)
+               v.reset(OpAMD64VPMULHWMasked128Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VGF2P8MULB128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPMULLD128 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VGF2P8MULBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // result: (VPMULLDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VGF2P8MULB128 {
+               if v_1.Op != OpAMD64VPMULLD128 {
                        break
                }
                y := v_1.Args[1]
@@ -44025,195 +44817,196 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VGF2P8MULBMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
+               v.reset(OpAMD64VPMULLDMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPABSD128 x) mask)
+       // match: (VPBLENDVB128 dst (VPMULLQ128 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPABSDMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // result: (VPMULLQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPABSD128 {
+               if v_1.Op != OpAMD64VPMULLQ128 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPABSDMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPMULLQMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPBROADCASTB256 x) mask)
+       // match: (VPBLENDVB128 dst (VPMULLW128 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPBROADCASTBMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // result: (VPMULLWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPBROADCASTB256 {
+               if v_1.Op != OpAMD64VPMULLW128 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPBROADCASTBMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
+               v.reset(OpAMD64VPMULLWMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VMAXPD128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPOPCNTB128 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VMAXPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // result: (VPOPCNTBMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VMAXPD128 {
+               if v_1.Op != OpAMD64VPOPCNTB128 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VMAXPDMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v.reset(OpAMD64VPOPCNTBMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPMINUQ128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPOPCNTD128 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMINUQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // result: (VPOPCNTDMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMINUQ128 {
+               if v_1.Op != OpAMD64VPOPCNTD128 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMINUQMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v.reset(OpAMD64VPOPCNTDMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VMULPS128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPOPCNTQ128 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VMULPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // result: (VPOPCNTQMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VMULPS128 {
+               if v_1.Op != OpAMD64VPOPCNTQ128 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VMULPSMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPOPCNTQMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPMULHUW128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPOPCNTW128 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMULHUWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // result: (VPOPCNTWMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMULHUW128 {
+               if v_1.Op != OpAMD64VPOPCNTW128 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMULHUWMasked128Merging)
+               v.reset(OpAMD64VPOPCNTWMasked128Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VMULPD128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPROLD128 [a] x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VMULPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // result: (VPROLDMasked128Merging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VMULPD128 {
+               if v_1.Op != OpAMD64VPROLD128 {
                        break
                }
-               y := v_1.Args[1]
+               a := auxIntToUint8(v_1.AuxInt)
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VMULPDMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v.reset(OpAMD64VPROLDMasked128Merging)
+               v.AuxInt = uint8ToAuxInt(a)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPSUBB128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPROLQ128 [a] x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSUBBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // result: (VPROLQMasked128Merging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSUBB128 {
+               if v_1.Op != OpAMD64VPROLQ128 {
                        break
                }
-               y := v_1.Args[1]
+               a := auxIntToUint8(v_1.AuxInt)
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSUBBMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
+               v.reset(OpAMD64VPROLQMasked128Merging)
+               v.AuxInt = uint8ToAuxInt(a)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VCVTPS2UDQ128 x) mask)
+       // match: (VPBLENDVB128 dst (VPROLVD128 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VCVTPS2UDQMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // result: (VPROLVDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VCVTPS2UDQ128 {
+               if v_1.Op != OpAMD64VPROLVD128 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VCVTPS2UDQMasked128Merging)
+               v.reset(OpAMD64VPROLVDMasked128Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VSCALEFPS128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPROLVQ128 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VSCALEFPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // result: (VPROLVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VSCALEFPS128 {
+               if v_1.Op != OpAMD64VPROLVQ128 {
                        break
                }
                y := v_1.Args[1]
@@ -44222,57 +45015,60 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VSCALEFPSMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPROLVQMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPSLLVQ128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPRORD128 [a] x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSLLVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // result: (VPRORDMasked128Merging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSLLVQ128 {
+               if v_1.Op != OpAMD64VPRORD128 {
                        break
                }
-               y := v_1.Args[1]
+               a := auxIntToUint8(v_1.AuxInt)
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSLLVQMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v.reset(OpAMD64VPRORDMasked128Merging)
+               v.AuxInt = uint8ToAuxInt(a)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPMOVSXBQ256 x) mask)
+       // match: (VPBLENDVB128 dst (VPRORQ128 [a] x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMOVSXBQMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // result: (VPRORQMasked128Merging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVSXBQ256 {
+               if v_1.Op != OpAMD64VPRORQ128 {
                        break
                }
+               a := auxIntToUint8(v_1.AuxInt)
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMOVSXBQMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
+               v.reset(OpAMD64VPRORQMasked128Merging)
+               v.AuxInt = uint8ToAuxInt(a)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPADDW128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPRORVD128 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPADDWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // result: (VPRORVDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPADDW128 {
+               if v_1.Op != OpAMD64VPRORVD128 {
                        break
                }
                y := v_1.Args[1]
@@ -44281,86 +45077,93 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPADDWMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPRORVDMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPMOVSXWD128 x) mask)
+       // match: (VPBLENDVB128 dst (VPRORVQ128 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMOVSXWDMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // result: (VPRORVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVSXWD128 {
+               if v_1.Op != OpAMD64VPRORVQ128 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMOVSXWDMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPRORVQMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VRSQRT14PD128 x) mask)
+       // match: (VPBLENDVB128 dst (VPSHLDD128 [a] x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VRSQRT14PDMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // result: (VPSHLDDMasked128Merging dst [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VRSQRT14PD128 {
+               if v_1.Op != OpAMD64VPSHLDD128 {
                        break
                }
+               a := auxIntToUint8(v_1.AuxInt)
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VRSQRT14PDMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v.reset(OpAMD64VPSHLDDMasked128Merging)
+               v.AuxInt = uint8ToAuxInt(a)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPSRAW128const [a] x) mask)
+       // match: (VPBLENDVB128 dst (VPSHLDQ128 [a] x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSRAWMasked128constMerging dst [a] x (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // result: (VPSHLDQMasked128Merging dst [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSRAW128const {
+               if v_1.Op != OpAMD64VPSHLDQ128 {
                        break
                }
                a := auxIntToUint8(v_1.AuxInt)
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSRAWMasked128constMerging)
+               v.reset(OpAMD64VPSHLDQMasked128Merging)
                v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPMULHW128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPSHLDW128 [a] x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMULHWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // result: (VPSHLDWMasked128Merging dst [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMULHW128 {
+               if v_1.Op != OpAMD64VPSHLDW128 {
                        break
                }
+               a := auxIntToUint8(v_1.AuxInt)
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMULHWMasked128Merging)
+               v.reset(OpAMD64VPSHLDWMasked128Merging)
+               v.AuxInt = uint8ToAuxInt(a)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
@@ -44388,52 +45191,56 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPSUBSB128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPSHRDQ128 [a] x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSUBSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // result: (VPSHRDQMasked128Merging dst [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSUBSB128 {
+               if v_1.Op != OpAMD64VPSHRDQ128 {
                        break
                }
+               a := auxIntToUint8(v_1.AuxInt)
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSUBSBMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
+               v.reset(OpAMD64VPSHRDQMasked128Merging)
+               v.AuxInt = uint8ToAuxInt(a)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPADDSW128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPSHRDW128 [a] x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPADDSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // result: (VPSHRDWMasked128Merging dst [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPADDSW128 {
+               if v_1.Op != OpAMD64VPSHRDW128 {
                        break
                }
+               a := auxIntToUint8(v_1.AuxInt)
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPADDSWMasked128Merging)
+               v.reset(OpAMD64VPSHRDWMasked128Merging)
+               v.AuxInt = uint8ToAuxInt(a)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPMINSB128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPSHUFB128 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMINSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // result: (VPSHUFBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMINSB128 {
+               if v_1.Op != OpAMD64VPSHUFB128 {
                        break
                }
                y := v_1.Args[1]
@@ -44442,7 +45249,7 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMINSBMasked128Merging)
+               v.reset(OpAMD64VPSHUFBMasked128Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
@@ -44469,31 +45276,33 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPBROADCASTQ512 x) mask)
+       // match: (VPBLENDVB128 dst (VPSHUFHW128 [a] x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPBROADCASTQMasked512Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // result: (VPSHUFHWMasked128Merging dst [a] x (VPMOVVec16x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPBROADCASTQ512 {
+               if v_1.Op != OpAMD64VPSHUFHW128 {
                        break
                }
+               a := auxIntToUint8(v_1.AuxInt)
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPBROADCASTQMasked512Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v.reset(OpAMD64VPSHUFHWMasked128Merging)
+               v.AuxInt = uint8ToAuxInt(a)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VREDUCEPS128 [a] x) mask)
+       // match: (VPBLENDVB128 dst (VPSLLD128const [a] x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VREDUCEPSMasked128Merging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // result: (VPSLLDMasked128constMerging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VREDUCEPS128 {
+               if v_1.Op != OpAMD64VPSLLD128const {
                        break
                }
                a := auxIntToUint8(v_1.AuxInt)
@@ -44502,38 +45311,40 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VREDUCEPSMasked128Merging)
+               v.reset(OpAMD64VPSLLDMasked128constMerging)
                v.AuxInt = uint8ToAuxInt(a)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPMOVZXWQ512 x) mask)
+       // match: (VPBLENDVB128 dst (VPSLLQ128const [a] x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMOVZXWQMasked512Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // result: (VPSLLQMasked128constMerging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVZXWQ512 {
+               if v_1.Op != OpAMD64VPSLLQ128const {
                        break
                }
+               a := auxIntToUint8(v_1.AuxInt)
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMOVZXWQMasked512Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPSLLQMasked128constMerging)
+               v.AuxInt = uint8ToAuxInt(a)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPSRAVW128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPSLLVD128 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSRAVWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // result: (VPSLLVDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSRAVW128 {
+               if v_1.Op != OpAMD64VPSLLVD128 {
                        break
                }
                y := v_1.Args[1]
@@ -44542,37 +45353,38 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSRAVWMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPSLLVDMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VSQRTPD128 x) mask)
+       // match: (VPBLENDVB128 dst (VPSLLVQ128 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VSQRTPDMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // result: (VPSLLVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VSQRTPD128 {
+               if v_1.Op != OpAMD64VPSLLVQ128 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VSQRTPDMasked128Merging)
+               v.reset(OpAMD64VPSLLVQMasked128Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPAVGW128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPSLLVW128 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPAVGWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // result: (VPSLLVWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPAVGW128 {
+               if v_1.Op != OpAMD64VPSLLVW128 {
                        break
                }
                y := v_1.Args[1]
@@ -44581,96 +45393,141 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPAVGWMasked128Merging)
+               v.reset(OpAMD64VPSLLVWMasked128Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VDIVPS128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPSLLW128const [a] x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VDIVPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // result: (VPSLLWMasked128constMerging dst [a] x (VPMOVVec16x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VDIVPS128 {
+               if v_1.Op != OpAMD64VPSLLW128const {
                        break
                }
-               y := v_1.Args[1]
+               a := auxIntToUint8(v_1.AuxInt)
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VDIVPSMasked128Merging)
+               v.reset(OpAMD64VPSLLWMasked128constMerging)
+               v.AuxInt = uint8ToAuxInt(a)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg3(dst, x, v0)
+               return true
+       }
+       // match: (VPBLENDVB128 dst (VPSRAD128const [a] x) mask)
+       // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
+       // result: (VPSRADMasked128constMerging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       for {
+               dst := v_0
+               if v_1.Op != OpAMD64VPSRAD128const {
+                       break
+               }
+               a := auxIntToUint8(v_1.AuxInt)
+               x := v_1.Args[0]
+               mask := v_2
+               if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
+                       break
+               }
+               v.reset(OpAMD64VPSRADMasked128constMerging)
+               v.AuxInt = uint8ToAuxInt(a)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VDIVPD128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPSRAQ128const [a] x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VDIVPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // result: (VPSRAQMasked128constMerging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VDIVPD128 {
+               if v_1.Op != OpAMD64VPSRAQ128const {
                        break
                }
-               y := v_1.Args[1]
+               a := auxIntToUint8(v_1.AuxInt)
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VDIVPDMasked128Merging)
+               v.reset(OpAMD64VPSRAQMasked128constMerging)
+               v.AuxInt = uint8ToAuxInt(a)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPOPCNTD128 x) mask)
+       // match: (VPBLENDVB128 dst (VPSRAVD128 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPOPCNTDMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // result: (VPSRAVDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPOPCNTD128 {
+               if v_1.Op != OpAMD64VPSRAVD128 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPOPCNTDMasked128Merging)
+               v.reset(OpAMD64VPSRAVDMasked128Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPBROADCASTQ256 x) mask)
+       // match: (VPBLENDVB128 dst (VPSRAVQ128 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPBROADCASTQMasked256Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // result: (VPSRAVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPBROADCASTQ256 {
+               if v_1.Op != OpAMD64VPSRAVQ128 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPBROADCASTQMasked256Merging)
+               v.reset(OpAMD64VPSRAVQMasked128Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VRNDSCALEPD128 [a] x) mask)
+       // match: (VPBLENDVB128 dst (VPSRAVW128 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VRNDSCALEPDMasked128Merging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // result: (VPSRAVWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VRNDSCALEPD128 {
+               if v_1.Op != OpAMD64VPSRAVW128 {
+                       break
+               }
+               y := v_1.Args[1]
+               x := v_1.Args[0]
+               mask := v_2
+               if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
+                       break
+               }
+               v.reset(OpAMD64VPSRAVWMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg4(dst, x, y, v0)
+               return true
+       }
+       // match: (VPBLENDVB128 dst (VPSRAW128const [a] x) mask)
+       // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
+       // result: (VPSRAWMasked128constMerging dst [a] x (VPMOVVec16x8ToM <types.TypeMask> mask))
+       for {
+               dst := v_0
+               if v_1.Op != OpAMD64VPSRAW128const {
                        break
                }
                a := auxIntToUint8(v_1.AuxInt)
@@ -44679,57 +45536,59 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VRNDSCALEPDMasked128Merging)
+               v.reset(OpAMD64VPSRAWMasked128constMerging)
                v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPMOVSXWQ128 x) mask)
+       // match: (VPBLENDVB128 dst (VPSRLVD128 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMOVSXWQMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // result: (VPSRLVDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVSXWQ128 {
+               if v_1.Op != OpAMD64VPSRLVD128 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMOVSXWQMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPSRLVDMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPOPCNTQ128 x) mask)
+       // match: (VPBLENDVB128 dst (VPSRLVQ128 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPOPCNTQMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // result: (VPSRLVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPOPCNTQ128 {
+               if v_1.Op != OpAMD64VPSRLVQ128 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPOPCNTQMasked128Merging)
+               v.reset(OpAMD64VPSRLVQMasked128Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPAVGB128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPSRLVW128 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPAVGBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // result: (VPSRLVWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPAVGB128 {
+               if v_1.Op != OpAMD64VPSRLVW128 {
                        break
                }
                y := v_1.Args[1]
@@ -44738,37 +45597,38 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPAVGBMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
+               v.reset(OpAMD64VPSRLVWMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPMOVSXBQ128 x) mask)
+       // match: (VPBLENDVB128 dst (VPSUBB128 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMOVSXBQMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // result: (VPSUBBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVSXBQ128 {
+               if v_1.Op != OpAMD64VPSUBB128 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMOVSXBQMasked128Merging)
+               v.reset(OpAMD64VPSUBBMasked128Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPMAXSW128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPSUBD128 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMAXSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // result: (VPSUBDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMAXSW128 {
+               if v_1.Op != OpAMD64VPSUBD128 {
                        break
                }
                y := v_1.Args[1]
@@ -44777,98 +45637,98 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMAXSWMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPSUBDMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPMOVZXBW256 x) mask)
+       // match: (VPBLENDVB128 dst (VPSUBQ128 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMOVZXBWMasked256Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // result: (VPSUBQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVZXBW256 {
+               if v_1.Op != OpAMD64VPSUBQ128 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMOVZXBWMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
+               v.reset(OpAMD64VPSUBQMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPMOVZXBD512 x) mask)
+       // match: (VPBLENDVB128 dst (VPSUBSB128 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMOVZXBDMasked512Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // result: (VPSUBSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVZXBD512 {
+               if v_1.Op != OpAMD64VPSUBSB128 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMOVZXBDMasked512Merging)
+               v.reset(OpAMD64VPSUBSBMasked128Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPSHUFHW128 [a] x) mask)
+       // match: (VPBLENDVB128 dst (VPSUBSW128 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSHUFHWMasked128Merging dst [a] x (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // result: (VPSUBSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSHUFHW128 {
+               if v_1.Op != OpAMD64VPSUBSW128 {
                        break
                }
-               a := auxIntToUint8(v_1.AuxInt)
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSHUFHWMasked128Merging)
-               v.AuxInt = uint8ToAuxInt(a)
+               v.reset(OpAMD64VPSUBSWMasked128Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPSLLW128const [a] x) mask)
+       // match: (VPBLENDVB128 dst (VPSUBUSB128 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSLLWMasked128constMerging dst [a] x (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // result: (VPSUBUSBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSLLW128const {
+               if v_1.Op != OpAMD64VPSUBUSB128 {
                        break
                }
-               a := auxIntToUint8(v_1.AuxInt)
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSLLWMasked128constMerging)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPSUBUSBMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPSLLVD128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPSUBUSW128 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSLLVDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // result: (VPSUBUSWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSLLVD128 {
+               if v_1.Op != OpAMD64VPSUBUSW128 {
                        break
                }
                y := v_1.Args[1]
@@ -44877,18 +45737,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSLLVDMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPSUBUSWMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPSRLVD128 x y) mask)
+       // match: (VPBLENDVB128 dst (VPSUBW128 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSRLVDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // result: (VPSUBWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSRLVD128 {
+               if v_1.Op != OpAMD64VPSUBW128 {
                        break
                }
                y := v_1.Args[1]
@@ -44897,18 +45757,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSRLVDMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPSUBWMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPMOVSXWQ512 x) mask)
+       // match: (VPBLENDVB128 dst (VRCP14PD128 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMOVSXWQMasked512Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // result: (VRCP14PDMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVSXWQ512 {
+               if v_1.Op != OpAMD64VRCP14PD128 {
                        break
                }
                x := v_1.Args[0]
@@ -44916,38 +45776,39 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMOVSXWQMasked512Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+               v.reset(OpAMD64VRCP14PDMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPSUBQ128 x y) mask)
+       // match: (VPBLENDVB128 dst (VREDUCEPD128 [a] x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSUBQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // result: (VREDUCEPDMasked128Merging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSUBQ128 {
+               if v_1.Op != OpAMD64VREDUCEPD128 {
                        break
                }
-               y := v_1.Args[1]
+               a := auxIntToUint8(v_1.AuxInt)
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSUBQMasked128Merging)
+               v.reset(OpAMD64VREDUCEPDMasked128Merging)
+               v.AuxInt = uint8ToAuxInt(a)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPSLLD128const [a] x) mask)
+       // match: (VPBLENDVB128 dst (VREDUCEPS128 [a] x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSLLDMasked128constMerging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // result: (VREDUCEPSMasked128Merging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSLLD128const {
+               if v_1.Op != OpAMD64VREDUCEPS128 {
                        break
                }
                a := auxIntToUint8(v_1.AuxInt)
@@ -44956,39 +45817,40 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSLLDMasked128constMerging)
+               v.reset(OpAMD64VREDUCEPSMasked128Merging)
                v.AuxInt = uint8ToAuxInt(a)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPSRLVW128 x y) mask)
+       // match: (VPBLENDVB128 dst (VRNDSCALEPD128 [a] x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSRLVWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // result: (VRNDSCALEPDMasked128Merging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSRLVW128 {
+               if v_1.Op != OpAMD64VRNDSCALEPD128 {
                        break
                }
-               y := v_1.Args[1]
+               a := auxIntToUint8(v_1.AuxInt)
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSRLVWMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+               v.reset(OpAMD64VRNDSCALEPDMasked128Merging)
+               v.AuxInt = uint8ToAuxInt(a)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPSLLQ128const [a] x) mask)
+       // match: (VPBLENDVB128 dst (VRNDSCALEPS128 [a] x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSLLQMasked128constMerging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // result: (VRNDSCALEPSMasked128Merging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSLLQ128const {
+               if v_1.Op != OpAMD64VRNDSCALEPS128 {
                        break
                }
                a := auxIntToUint8(v_1.AuxInt)
@@ -44997,77 +45859,78 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSLLQMasked128constMerging)
+               v.reset(OpAMD64VRNDSCALEPSMasked128Merging)
                v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPSRAVD128 x y) mask)
+       // match: (VPBLENDVB128 dst (VRSQRT14PD128 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSRAVDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // result: (VRSQRT14PDMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSRAVD128 {
+               if v_1.Op != OpAMD64VRSQRT14PD128 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSRAVDMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v.reset(OpAMD64VRSQRT14PDMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPMOVSXBD128 x) mask)
+       // match: (VPBLENDVB128 dst (VSCALEFPD128 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMOVSXBDMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // result: (VSCALEFPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVSXBD128 {
+               if v_1.Op != OpAMD64VSCALEFPD128 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMOVSXBDMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
+               v.reset(OpAMD64VSCALEFPDMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPMOVZXBQ512 x) mask)
+       // match: (VPBLENDVB128 dst (VSCALEFPS128 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMOVZXBQMasked512Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // result: (VSCALEFPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVZXBQ512 {
+               if v_1.Op != OpAMD64VSCALEFPS128 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMOVZXBQMasked512Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
+               v.reset(OpAMD64VSCALEFPSMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPLZCNTQ128 x) mask)
+       // match: (VPBLENDVB128 dst (VSQRTPD128 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPLZCNTQMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // result: (VSQRTPDMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPLZCNTQ128 {
+               if v_1.Op != OpAMD64VSQRTPD128 {
                        break
                }
                x := v_1.Args[0]
@@ -45075,57 +45938,57 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPLZCNTQMasked128Merging)
+               v.reset(OpAMD64VSQRTPDMasked128Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPACKSSDW128 x y) mask)
+       // match: (VPBLENDVB128 dst (VSQRTPS128 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPACKSSDWMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // result: (VSQRTPSMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPACKSSDW128 {
+               if v_1.Op != OpAMD64VSQRTPS128 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPACKSSDWMasked128Merging)
+               v.reset(OpAMD64VSQRTPSMasked128Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPMOVZXWD128 x) mask)
+       // match: (VPBLENDVB128 dst (VSUBPD128 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMOVZXWDMasked128Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // result: (VSUBPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVZXWD128 {
+               if v_1.Op != OpAMD64VSUBPD128 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMOVZXWDMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+               v.reset(OpAMD64VSUBPDMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPSRAVQ128 x y) mask)
+       // match: (VPBLENDVB128 dst (VSUBPS128 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSRAVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // result: (VSUBPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSRAVQ128 {
+               if v_1.Op != OpAMD64VSUBPS128 {
                        break
                }
                y := v_1.Args[1]
@@ -45134,37 +45997,45 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSRAVQMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v.reset(OpAMD64VSUBPSMasked128Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPBROADCASTD128 x) mask)
+       return false
+}
+func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (VPBLENDVB256 dst (VADDPD256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPBROADCASTDMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // result: (VADDPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPBROADCASTD128 {
+               if v_1.Op != OpAMD64VADDPD256 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPBROADCASTDMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v.reset(OpAMD64VADDPDMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VMAXPS128 x y) mask)
+       // match: (VPBLENDVB256 dst (VADDPS256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VMAXPSMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // result: (VADDPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VMAXPS128 {
+               if v_1.Op != OpAMD64VADDPS256 {
                        break
                }
                y := v_1.Args[1]
@@ -45173,98 +46044,96 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VMAXPSMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v.reset(OpAMD64VADDPSMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPSHRDQ128 [a] x y) mask)
+       // match: (VPBLENDVB256 dst (VCVTPS2UDQ256 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSHRDQMasked128Merging dst [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // result: (VCVTPS2UDQMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSHRDQ128 {
+               if v_1.Op != OpAMD64VCVTPS2UDQ256 {
                        break
                }
-               a := auxIntToUint8(v_1.AuxInt)
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSHRDQMasked128Merging)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v.reset(OpAMD64VCVTPS2UDQMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPMAXUW128 x y) mask)
+       // match: (VPBLENDVB256 dst (VCVTTPS2DQ256 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMAXUWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // result: (VCVTTPS2DQMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMAXUW128 {
+               if v_1.Op != OpAMD64VCVTTPS2DQ256 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMAXUWMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+               v.reset(OpAMD64VCVTTPS2DQMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPABSB128 x) mask)
+       // match: (VPBLENDVB256 dst (VDIVPD256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPABSBMasked128Merging dst x (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // result: (VDIVPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPABSB128 {
+               if v_1.Op != OpAMD64VDIVPD256 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPABSBMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
+               v.reset(OpAMD64VDIVPDMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPABSQ128 x) mask)
+       // match: (VPBLENDVB256 dst (VDIVPS256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPABSQMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // result: (VDIVPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPABSQ128 {
+               if v_1.Op != OpAMD64VDIVPS256 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPABSQMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v.reset(OpAMD64VDIVPSMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VSCALEFPD128 x y) mask)
+       // match: (VPBLENDVB256 dst (VGF2P8MULB256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VSCALEFPDMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // result: (VGF2P8MULBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VSCALEFPD128 {
+               if v_1.Op != OpAMD64VGF2P8MULB256 {
                        break
                }
                y := v_1.Args[1]
@@ -45273,56 +46142,58 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VSCALEFPDMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v.reset(OpAMD64VGF2P8MULBMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VSQRTPS128 x) mask)
+       // match: (VPBLENDVB256 dst (VMAXPD256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VSQRTPSMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // result: (VMAXPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VSQRTPS128 {
+               if v_1.Op != OpAMD64VMAXPD256 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VSQRTPSMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v.reset(OpAMD64VMAXPDMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPBROADCASTW512 x) mask)
+       // match: (VPBLENDVB256 dst (VMAXPS256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPBROADCASTWMasked512Merging dst x (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // result: (VMAXPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPBROADCASTW512 {
+               if v_1.Op != OpAMD64VMAXPS256 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPBROADCASTWMasked512Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+               v.reset(OpAMD64VMAXPSMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB128 dst (VPMAXUB128 x y) mask)
+       // match: (VPBLENDVB256 dst (VMINPD256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMAXUBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // result: (VMINPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMAXUB128 {
+               if v_1.Op != OpAMD64VMINPD256 {
                        break
                }
                y := v_1.Args[1]
@@ -45331,44 +46202,38 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMAXUBMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
+               v.reset(OpAMD64VMINPDMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       return false
-}
-func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (VPBLENDVB256 dst (VPMOVSXBW512 x) mask)
+       // match: (VPBLENDVB256 dst (VMINPS256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMOVSXBWMasked512Merging dst x (VPMOVVec8x32ToM <types.TypeMask> mask))
+       // result: (VMINPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVSXBW512 {
+               if v_1.Op != OpAMD64VMINPS256 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMOVSXBWMasked512Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
+               v.reset(OpAMD64VMINPSMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPADDUSB256 x y) mask)
+       // match: (VPBLENDVB256 dst (VMULPD256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPADDUSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+       // result: (VMULPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPADDUSB256 {
+               if v_1.Op != OpAMD64VMULPD256 {
                        break
                }
                y := v_1.Args[1]
@@ -45377,8 +46242,8 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPADDUSBMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
+               v.reset(OpAMD64VMULPDMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
@@ -45403,12 +46268,12 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPOPCNTB256 x) mask)
+       // match: (VPBLENDVB256 dst (VPABSB256 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPOPCNTBMasked256Merging dst x (VPMOVVec8x32ToM <types.TypeMask> mask))
+       // result: (VPABSBMasked256Merging dst x (VPMOVVec8x32ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPOPCNTB256 {
+               if v_1.Op != OpAMD64VPABSB256 {
                        break
                }
                x := v_1.Args[0]
@@ -45416,79 +46281,75 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPOPCNTBMasked256Merging)
+               v.reset(OpAMD64VPABSBMasked256Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VSUBPS256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPABSD256 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VSUBPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // result: (VPABSDMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VSUBPS256 {
+               if v_1.Op != OpAMD64VPABSD256 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VSUBPSMasked256Merging)
+               v.reset(OpAMD64VPABSDMasked256Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPMAXUQ256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPABSQ256 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMAXUQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // result: (VPABSQMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMAXUQ256 {
+               if v_1.Op != OpAMD64VPABSQ256 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMAXUQMasked256Merging)
+               v.reset(OpAMD64VPABSQMasked256Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPROLD256 [a] x) mask)
+       // match: (VPBLENDVB256 dst (VPABSW256 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPROLDMasked256Merging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // result: (VPABSWMasked256Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPROLD256 {
+               if v_1.Op != OpAMD64VPABSW256 {
                        break
                }
-               a := auxIntToUint8(v_1.AuxInt)
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPROLDMasked256Merging)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPABSWMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPSRAVD256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPACKSSDW256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSRAVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // result: (VPACKSSDWMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSRAVD256 {
+               if v_1.Op != OpAMD64VPACKSSDW256 {
                        break
                }
                y := v_1.Args[1]
@@ -45497,18 +46358,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSRAVDMasked256Merging)
+               v.reset(OpAMD64VPACKSSDWMasked256Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VADDPS256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPACKUSDW256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VADDPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // result: (VPACKUSDWMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VADDPS256 {
+               if v_1.Op != OpAMD64VPACKUSDW256 {
                        break
                }
                y := v_1.Args[1]
@@ -45517,96 +46378,98 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VADDPSMasked256Merging)
+               v.reset(OpAMD64VPACKUSDWMasked256Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPMOVSXDQ512 x) mask)
+       // match: (VPBLENDVB256 dst (VPADDB256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMOVSXDQMasked512Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // result: (VPADDBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVSXDQ512 {
+               if v_1.Op != OpAMD64VPADDB256 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMOVSXDQMasked512Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPADDBMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPMOVUSWB128 x) mask)
+       // match: (VPBLENDVB256 dst (VPADDD256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMOVUSWBMasked128Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // result: (VPADDDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVUSWB128 {
+               if v_1.Op != OpAMD64VPADDD256 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMOVUSWBMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+               v.reset(OpAMD64VPADDDMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPSRAQ256const [a] x) mask)
+       // match: (VPBLENDVB256 dst (VPADDQ256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSRAQMasked256constMerging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // result: (VPADDQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSRAQ256const {
+               if v_1.Op != OpAMD64VPADDQ256 {
                        break
                }
-               a := auxIntToUint8(v_1.AuxInt)
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSRAQMasked256constMerging)
-               v.AuxInt = uint8ToAuxInt(a)
+               v.reset(OpAMD64VPADDQMasked256Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VCVTPS2UDQ256 x) mask)
+       // match: (VPBLENDVB256 dst (VPADDSB256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VCVTPS2UDQMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // result: (VPADDSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VCVTPS2UDQ256 {
+               if v_1.Op != OpAMD64VPADDSB256 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VCVTPS2UDQMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPADDSBMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPMINSW256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPADDSW256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMINSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // result: (VPADDSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMINSW256 {
+               if v_1.Op != OpAMD64VPADDSW256 {
                        break
                }
                y := v_1.Args[1]
@@ -45615,40 +46478,38 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMINSWMasked256Merging)
+               v.reset(OpAMD64VPADDSWMasked256Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPSHLDD256 [a] x y) mask)
+       // match: (VPBLENDVB256 dst (VPADDUSB256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSHLDDMasked256Merging dst [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // result: (VPADDUSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSHLDD256 {
+               if v_1.Op != OpAMD64VPADDUSB256 {
                        break
                }
-               a := auxIntToUint8(v_1.AuxInt)
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSHLDDMasked256Merging)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPADDUSBMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPSLLVW256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPADDUSW256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSLLVWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // result: (VPADDUSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSLLVW256 {
+               if v_1.Op != OpAMD64VPADDUSW256 {
                        break
                }
                y := v_1.Args[1]
@@ -45657,18 +46518,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSLLVWMasked256Merging)
+               v.reset(OpAMD64VPADDUSWMasked256Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPSRLVQ256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPADDW256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSRLVQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // result: (VPADDWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSRLVQ256 {
+               if v_1.Op != OpAMD64VPADDW256 {
                        break
                }
                y := v_1.Args[1]
@@ -45677,18 +46538,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSRLVQMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPADDWMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPSUBUSB256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPAVGB256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSUBUSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+       // result: (VPAVGBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSUBUSB256 {
+               if v_1.Op != OpAMD64VPAVGB256 {
                        break
                }
                y := v_1.Args[1]
@@ -45697,18 +46558,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSUBUSBMasked256Merging)
+               v.reset(OpAMD64VPAVGBMasked256Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPMAXSW256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPAVGW256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMAXSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // result: (VPAVGWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMAXSW256 {
+               if v_1.Op != OpAMD64VPAVGW256 {
                        break
                }
                y := v_1.Args[1]
@@ -45717,58 +46578,56 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMAXSWMasked256Merging)
+               v.reset(OpAMD64VPAVGWMasked256Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VMINPS256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPLZCNTD256 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VMINPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // result: (VPLZCNTDMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VMINPS256 {
+               if v_1.Op != OpAMD64VPLZCNTD256 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VMINPSMasked256Merging)
+               v.reset(OpAMD64VPLZCNTDMasked256Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPMINSD256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPLZCNTQ256 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMINSDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // result: (VPLZCNTQMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMINSD256 {
+               if v_1.Op != OpAMD64VPLZCNTQ256 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMINSDMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPLZCNTQMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPADDSW256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPMADDUBSW256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPADDSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // result: (VPMADDUBSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPADDSW256 {
+               if v_1.Op != OpAMD64VPMADDUBSW256 {
                        break
                }
                y := v_1.Args[1]
@@ -45777,39 +46636,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPADDSWMasked256Merging)
+               v.reset(OpAMD64VPMADDUBSWMasked256Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VRNDSCALEPS256 [a] x) mask)
-       // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VRNDSCALEPSMasked256Merging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               dst := v_0
-               if v_1.Op != OpAMD64VRNDSCALEPS256 {
-                       break
-               }
-               a := auxIntToUint8(v_1.AuxInt)
-               x := v_1.Args[0]
-               mask := v_2
-               if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
-                       break
-               }
-               v.reset(OpAMD64VRNDSCALEPSMasked256Merging)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
-               return true
-       }
-       // match: (VPBLENDVB256 dst (VPROLVQ256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPMADDWD256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPROLVQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // result: (VPMADDWDMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPROLVQ256 {
+               if v_1.Op != OpAMD64VPMADDWD256 {
                        break
                }
                y := v_1.Args[1]
@@ -45818,18 +46656,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPROLVQMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPMADDWDMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPMULHW256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPMAXSB256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMULHWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // result: (VPMAXSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMULHW256 {
+               if v_1.Op != OpAMD64VPMAXSB256 {
                        break
                }
                y := v_1.Args[1]
@@ -45838,18 +46676,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMULHWMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+               v.reset(OpAMD64VPMAXSBMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VDIVPD256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPMAXSD256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VDIVPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // result: (VPMAXSDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VDIVPD256 {
+               if v_1.Op != OpAMD64VPMAXSD256 {
                        break
                }
                y := v_1.Args[1]
@@ -45858,37 +46696,38 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VDIVPDMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPMAXSDMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPLZCNTQ256 x) mask)
+       // match: (VPBLENDVB256 dst (VPMAXSQ256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPLZCNTQMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // result: (VPMAXSQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPLZCNTQ256 {
+               if v_1.Op != OpAMD64VPMAXSQ256 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPLZCNTQMasked256Merging)
+               v.reset(OpAMD64VPMAXSQMasked256Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPSRLVD256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPMAXSW256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSRLVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // result: (VPMAXSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSRLVD256 {
+               if v_1.Op != OpAMD64VPMAXSW256 {
                        break
                }
                y := v_1.Args[1]
@@ -45897,18 +46736,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSRLVDMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPMAXSWMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPADDD256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPMAXUB256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPADDDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // result: (VPMAXUBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPADDD256 {
+               if v_1.Op != OpAMD64VPMAXUB256 {
                        break
                }
                y := v_1.Args[1]
@@ -45917,56 +46756,58 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPADDDMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPMAXUBMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPMOVSDW128 x) mask)
+       // match: (VPBLENDVB256 dst (VPMAXUD256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMOVSDWMasked128Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // result: (VPMAXUDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVSDW128 {
+               if v_1.Op != OpAMD64VPMAXUD256 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMOVSDWMasked128Merging)
+               v.reset(OpAMD64VPMAXUDMasked256Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPOPCNTD256 x) mask)
+       // match: (VPBLENDVB256 dst (VPMAXUQ256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPOPCNTDMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // result: (VPMAXUQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPOPCNTD256 {
+               if v_1.Op != OpAMD64VPMAXUQ256 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPOPCNTDMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPMAXUQMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPADDUSW256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPMAXUW256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPADDUSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // result: (VPMAXUWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPADDUSW256 {
+               if v_1.Op != OpAMD64VPMAXUW256 {
                        break
                }
                y := v_1.Args[1]
@@ -45975,58 +46816,58 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPADDUSWMasked256Merging)
+               v.reset(OpAMD64VPMAXUWMasked256Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VSQRTPD256 x) mask)
+       // match: (VPBLENDVB256 dst (VPMINSB256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VSQRTPDMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // result: (VPMINSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VSQRTPD256 {
+               if v_1.Op != OpAMD64VPMINSB256 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VSQRTPDMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPMINSBMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VREDUCEPS256 [a] x) mask)
+       // match: (VPBLENDVB256 dst (VPMINSD256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VREDUCEPSMasked256Merging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // result: (VPMINSDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VREDUCEPS256 {
+               if v_1.Op != OpAMD64VPMINSD256 {
                        break
                }
-               a := auxIntToUint8(v_1.AuxInt)
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VREDUCEPSMasked256Merging)
-               v.AuxInt = uint8ToAuxInt(a)
+               v.reset(OpAMD64VPMINSDMasked256Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPSUBQ256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPMINSQ256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSUBQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // result: (VPMINSQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSUBQ256 {
+               if v_1.Op != OpAMD64VPMINSQ256 {
                        break
                }
                y := v_1.Args[1]
@@ -46035,37 +46876,38 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSUBQMasked256Merging)
+               v.reset(OpAMD64VPMINSQMasked256Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPMOVSXWD512 x) mask)
+       // match: (VPBLENDVB256 dst (VPMINSW256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMOVSXWDMasked512Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // result: (VPMINSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVSXWD512 {
+               if v_1.Op != OpAMD64VPMINSW256 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMOVSXWDMasked512Merging)
+               v.reset(OpAMD64VPMINSWMasked256Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VGF2P8MULB256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPMINUB256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VGF2P8MULBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+       // result: (VPMINUBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VGF2P8MULB256 {
+               if v_1.Op != OpAMD64VPMINUB256 {
                        break
                }
                y := v_1.Args[1]
@@ -46074,18 +46916,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VGF2P8MULBMasked256Merging)
+               v.reset(OpAMD64VPMINUBMasked256Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPSLLVD256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPMINUD256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSLLVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // result: (VPMINUDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSLLVD256 {
+               if v_1.Op != OpAMD64VPMINUD256 {
                        break
                }
                y := v_1.Args[1]
@@ -46094,18 +46936,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSLLVDMasked256Merging)
+               v.reset(OpAMD64VPMINUDMasked256Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPSRLVW256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPMINUQ256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSRLVWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // result: (VPMINUQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSRLVW256 {
+               if v_1.Op != OpAMD64VPMINUQ256 {
                        break
                }
                y := v_1.Args[1]
@@ -46114,18 +46956,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSRLVWMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+               v.reset(OpAMD64VPMINUQMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPADDW256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPMINUW256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPADDWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // result: (VPMINUWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPADDW256 {
+               if v_1.Op != OpAMD64VPMINUW256 {
                        break
                }
                y := v_1.Args[1]
@@ -46134,102 +46976,94 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPADDWMasked256Merging)
+               v.reset(OpAMD64VPMINUWMasked256Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VREDUCEPD256 [a] x) mask)
+       // match: (VPBLENDVB256 dst (VPMOVDB128_256 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VREDUCEPDMasked256Merging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // result: (VPMOVDBMasked128_256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VREDUCEPD256 {
+               if v_1.Op != OpAMD64VPMOVDB128_256 {
                        break
                }
-               a := auxIntToUint8(v_1.AuxInt)
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VREDUCEPDMasked256Merging)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPMOVDBMasked128_256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VRNDSCALEPD256 [a] x) mask)
+       // match: (VPBLENDVB256 dst (VPMOVDW128_256 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VRNDSCALEPDMasked256Merging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // result: (VPMOVDWMasked128_256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VRNDSCALEPD256 {
+               if v_1.Op != OpAMD64VPMOVDW128_256 {
                        break
                }
-               a := auxIntToUint8(v_1.AuxInt)
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VRNDSCALEPDMasked256Merging)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPMOVDWMasked128_256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPRORVD256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPMOVQB128_256 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPRORVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // result: (VPMOVQBMasked128_256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPRORVD256 {
+               if v_1.Op != OpAMD64VPMOVQB128_256 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPRORVDMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPMOVQBMasked128_256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPSHLDW256 [a] x y) mask)
+       // match: (VPBLENDVB256 dst (VPMOVQD128_256 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSHLDWMasked256Merging dst [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // result: (VPMOVQDMasked128_256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSHLDW256 {
+               if v_1.Op != OpAMD64VPMOVQD128_256 {
                        break
                }
-               a := auxIntToUint8(v_1.AuxInt)
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSHLDWMasked256Merging)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+               v.reset(OpAMD64VPMOVQDMasked128_256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VCVTTPS2DQ256 x) mask)
+       // match: (VPBLENDVB256 dst (VPMOVQW128_256 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VCVTTPS2DQMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // result: (VPMOVQWMasked128_256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VCVTTPS2DQ256 {
+               if v_1.Op != OpAMD64VPMOVQW128_256 {
                        break
                }
                x := v_1.Args[0]
@@ -46237,58 +47071,56 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VCVTTPS2DQMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPMOVQWMasked128_256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VSUBPD256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPMOVSDB128_256 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VSUBPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // result: (VPMOVSDBMasked128_256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VSUBPD256 {
+               if v_1.Op != OpAMD64VPMOVSDB128_256 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VSUBPDMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPMOVSDBMasked128_256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPSUBD256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPMOVSDW128_256 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSUBDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // result: (VPMOVSDWMasked128_256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSUBD256 {
+               if v_1.Op != OpAMD64VPMOVSDW128_256 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSUBDMasked256Merging)
+               v.reset(OpAMD64VPMOVSDWMasked128_256Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VSQRTPS256 x) mask)
+       // match: (VPBLENDVB256 dst (VPMOVSQB128_256 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VSQRTPSMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // result: (VPMOVSQBMasked128_256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VSQRTPS256 {
+               if v_1.Op != OpAMD64VPMOVSQB128_256 {
                        break
                }
                x := v_1.Args[0]
@@ -46296,78 +47128,75 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VSQRTPSMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPMOVSQBMasked128_256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPACKUSDW256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPMOVSQD128_256 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPACKUSDWMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // result: (VPMOVSQDMasked128_256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPACKUSDW256 {
+               if v_1.Op != OpAMD64VPMOVSQD128_256 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPACKUSDWMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPMOVSQDMasked128_256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPMULLD256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPMOVSQW128_256 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMULLDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // result: (VPMOVSQWMasked128_256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMULLD256 {
+               if v_1.Op != OpAMD64VPMOVSQW128_256 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMULLDMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPMOVSQWMasked128_256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPADDB256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPMOVSWB128_256 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPADDBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+       // result: (VPMOVSWBMasked128_256Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPADDB256 {
+               if v_1.Op != OpAMD64VPMOVSWB128_256 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPADDBMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
+               v.reset(OpAMD64VPMOVSWBMasked128_256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPMOVWB128 x) mask)
+       // match: (VPBLENDVB256 dst (VPMOVSXBW512 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMOVWBMasked128Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // result: (VPMOVSXBWMasked512Merging dst x (VPMOVVec8x32ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVWB128 {
+               if v_1.Op != OpAMD64VPMOVSXBW512 {
                        break
                }
                x := v_1.Args[0]
@@ -46375,38 +47204,37 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMOVWBMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+               v.reset(OpAMD64VPMOVSXBWMasked512Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPMADDWD256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPMOVSXDQ512 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMADDWDMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // result: (VPMOVSXDQMasked512Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMADDWD256 {
+               if v_1.Op != OpAMD64VPMOVSXDQ512 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMADDWDMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+               v.reset(OpAMD64VPMOVSXDQMasked512Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPMOVQD128 x) mask)
+       // match: (VPBLENDVB256 dst (VPMOVSXWD512 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMOVQDMasked128Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // result: (VPMOVSXWDMasked512Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVQD128 {
+               if v_1.Op != OpAMD64VPMOVSXWD512 {
                        break
                }
                x := v_1.Args[0]
@@ -46414,78 +47242,75 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMOVQDMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPMOVSXWDMasked512Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPMULHUW256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPMOVUSDB128_256 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMULHUWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // result: (VPMOVUSDBMasked128_256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMULHUW256 {
+               if v_1.Op != OpAMD64VPMOVUSDB128_256 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMULHUWMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+               v.reset(OpAMD64VPMOVUSDBMasked128_256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPMULLQ256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPMOVUSDW128_256 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMULLQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // result: (VPMOVUSDWMasked128_256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMULLQ256 {
+               if v_1.Op != OpAMD64VPMOVUSDW128_256 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMULLQMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPMOVUSDWMasked128_256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPROLVD256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPMOVUSQB128_256 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPROLVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // result: (VPMOVUSQBMasked128_256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPROLVD256 {
+               if v_1.Op != OpAMD64VPMOVUSQB128_256 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPROLVDMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPMOVUSQBMasked128_256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPMOVUSDW128 x) mask)
+       // match: (VPBLENDVB256 dst (VPMOVUSQD128_256 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMOVUSDWMasked128Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // result: (VPMOVUSQDMasked128_256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVUSDW128 {
+               if v_1.Op != OpAMD64VPMOVUSQD128_256 {
                        break
                }
                x := v_1.Args[0]
@@ -46493,159 +47318,152 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMOVUSDWMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPMOVUSQDMasked128_256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPMULLW256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPMOVUSQW128_256 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMULLWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // result: (VPMOVUSQWMasked128_256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMULLW256 {
+               if v_1.Op != OpAMD64VPMOVUSQW128_256 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMULLWMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+               v.reset(OpAMD64VPMOVUSQWMasked128_256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPRORD256 [a] x) mask)
+       // match: (VPBLENDVB256 dst (VPMOVUSWB128_256 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPRORDMasked256Merging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // result: (VPMOVUSWBMasked128_256Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPRORD256 {
+               if v_1.Op != OpAMD64VPMOVUSWB128_256 {
                        break
                }
-               a := auxIntToUint8(v_1.AuxInt)
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPRORDMasked256Merging)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPMOVUSWBMasked128_256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPSRAVW256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPMOVWB128_256 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSRAVWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // result: (VPMOVWBMasked128_256Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSRAVW256 {
+               if v_1.Op != OpAMD64VPMOVWB128_256 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSRAVWMasked256Merging)
+               v.reset(OpAMD64VPMOVWBMasked128_256Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPMINUD256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPMOVZXBW512 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMINUDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // result: (VPMOVZXBWMasked512Merging dst x (VPMOVVec8x32ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMINUD256 {
+               if v_1.Op != OpAMD64VPMOVZXBW512 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMINUDMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPMOVZXBWMasked512Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPSHUFD256 [a] x) mask)
+       // match: (VPBLENDVB256 dst (VPMOVZXDQ512 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSHUFDMasked256Merging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // result: (VPMOVZXDQMasked512Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSHUFD256 {
+               if v_1.Op != OpAMD64VPMOVZXDQ512 {
                        break
                }
-               a := auxIntToUint8(v_1.AuxInt)
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSHUFDMasked256Merging)
-               v.AuxInt = uint8ToAuxInt(a)
+               v.reset(OpAMD64VPMOVZXDQMasked512Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPSLLVQ256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPMOVZXWD512 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSLLVQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // result: (VPMOVZXWDMasked512Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSLLVQ256 {
+               if v_1.Op != OpAMD64VPMOVZXWD512 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSLLVQMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPMOVZXWDMasked512Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPMOVUSQD128 x) mask)
+       // match: (VPBLENDVB256 dst (VPMULHUW256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMOVUSQDMasked128Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // result: (VPMULHUWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVUSQD128 {
+               if v_1.Op != OpAMD64VPMULHUW256 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMOVUSQDMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPMULHUWMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPSUBUSW256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPMULHW256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSUBUSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // result: (VPMULHWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSUBUSW256 {
+               if v_1.Op != OpAMD64VPMULHW256 {
                        break
                }
                y := v_1.Args[1]
@@ -46654,37 +47472,38 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSUBUSWMasked256Merging)
+               v.reset(OpAMD64VPMULHWMasked256Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VRSQRT14PD256 x) mask)
+       // match: (VPBLENDVB256 dst (VPMULLD256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VRSQRT14PDMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // result: (VPMULLDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VRSQRT14PD256 {
+               if v_1.Op != OpAMD64VPMULLD256 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VRSQRT14PDMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPMULLDMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPADDSB256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPMULLQ256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPADDSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+       // result: (VPMULLQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPADDSB256 {
+               if v_1.Op != OpAMD64VPMULLQ256 {
                        break
                }
                y := v_1.Args[1]
@@ -46693,98 +47512,95 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPADDSBMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
+               v.reset(OpAMD64VPMULLQMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPMOVZXWD512 x) mask)
+       // match: (VPBLENDVB256 dst (VPMULLW256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMOVZXWDMasked512Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // result: (VPMULLWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVZXWD512 {
+               if v_1.Op != OpAMD64VPMULLW256 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMOVZXWDMasked512Merging)
+               v.reset(OpAMD64VPMULLWMasked256Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPROLQ256 [a] x) mask)
+       // match: (VPBLENDVB256 dst (VPOPCNTB256 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPROLQMasked256Merging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // result: (VPOPCNTBMasked256Merging dst x (VPMOVVec8x32ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPROLQ256 {
+               if v_1.Op != OpAMD64VPOPCNTB256 {
                        break
                }
-               a := auxIntToUint8(v_1.AuxInt)
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPROLQMasked256Merging)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPOPCNTBMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPAVGB256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPOPCNTD256 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPAVGBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+       // result: (VPOPCNTDMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPAVGB256 {
+               if v_1.Op != OpAMD64VPOPCNTD256 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPAVGBMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
+               v.reset(OpAMD64VPOPCNTDMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPRORVQ256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPOPCNTQ256 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPRORVQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // result: (VPOPCNTQMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPRORVQ256 {
+               if v_1.Op != OpAMD64VPOPCNTQ256 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPRORVQMasked256Merging)
+               v.reset(OpAMD64VPOPCNTQMasked256Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPMOVZXDQ512 x) mask)
+       // match: (VPBLENDVB256 dst (VPOPCNTW256 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMOVZXDQMasked512Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // result: (VPOPCNTWMasked256Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVZXDQ512 {
+               if v_1.Op != OpAMD64VPOPCNTW256 {
                        break
                }
                x := v_1.Args[0]
@@ -46792,38 +47608,39 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMOVZXDQMasked512Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPOPCNTWMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPMINUB256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPROLD256 [a] x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMINUBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+       // result: (VPROLDMasked256Merging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMINUB256 {
+               if v_1.Op != OpAMD64VPROLD256 {
                        break
                }
-               y := v_1.Args[1]
+               a := auxIntToUint8(v_1.AuxInt)
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMINUBMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
+               v.reset(OpAMD64VPROLDMasked256Merging)
+               v.AuxInt = uint8ToAuxInt(a)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPSLLW256const [a] x) mask)
+       // match: (VPBLENDVB256 dst (VPROLQ256 [a] x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSLLWMasked256constMerging dst [a] x (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // result: (VPROLQMasked256Merging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSLLW256const {
+               if v_1.Op != OpAMD64VPROLQ256 {
                        break
                }
                a := auxIntToUint8(v_1.AuxInt)
@@ -46832,19 +47649,19 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSLLWMasked256constMerging)
+               v.reset(OpAMD64VPROLQMasked256Merging)
                v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VSCALEFPS256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPROLVD256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VSCALEFPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // result: (VPROLVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VSCALEFPS256 {
+               if v_1.Op != OpAMD64VPROLVD256 {
                        break
                }
                y := v_1.Args[1]
@@ -46853,99 +47670,100 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VSCALEFPSMasked256Merging)
+               v.reset(OpAMD64VPROLVDMasked256Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPSLLQ256const [a] x) mask)
+       // match: (VPBLENDVB256 dst (VPROLVQ256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSLLQMasked256constMerging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // result: (VPROLVQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSLLQ256const {
+               if v_1.Op != OpAMD64VPROLVQ256 {
                        break
                }
-               a := auxIntToUint8(v_1.AuxInt)
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSLLQMasked256constMerging)
-               v.AuxInt = uint8ToAuxInt(a)
+               v.reset(OpAMD64VPROLVQMasked256Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPMINSB256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPRORD256 [a] x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMINSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+       // result: (VPRORDMasked256Merging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMINSB256 {
+               if v_1.Op != OpAMD64VPRORD256 {
                        break
                }
-               y := v_1.Args[1]
+               a := auxIntToUint8(v_1.AuxInt)
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMINSBMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
+               v.reset(OpAMD64VPRORDMasked256Merging)
+               v.AuxInt = uint8ToAuxInt(a)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPABSQ256 x) mask)
+       // match: (VPBLENDVB256 dst (VPRORQ256 [a] x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPABSQMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // result: (VPRORQMasked256Merging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPABSQ256 {
+               if v_1.Op != OpAMD64VPRORQ256 {
                        break
                }
+               a := auxIntToUint8(v_1.AuxInt)
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPABSQMasked256Merging)
+               v.reset(OpAMD64VPRORQMasked256Merging)
+               v.AuxInt = uint8ToAuxInt(a)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPSHUFHW256 [a] x) mask)
+       // match: (VPBLENDVB256 dst (VPRORVD256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSHUFHWMasked256Merging dst [a] x (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // result: (VPRORVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSHUFHW256 {
+               if v_1.Op != OpAMD64VPRORVD256 {
                        break
                }
-               a := auxIntToUint8(v_1.AuxInt)
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSHUFHWMasked256Merging)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+               v.reset(OpAMD64VPRORVDMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPSUBB256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPRORVQ256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSUBBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+       // result: (VPRORVQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSUBB256 {
+               if v_1.Op != OpAMD64VPRORVQ256 {
                        break
                }
                y := v_1.Args[1]
@@ -46954,238 +47772,254 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSUBBMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
+               v.reset(OpAMD64VPRORVQMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VMAXPS256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPSHLDD256 [a] x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VMAXPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // result: (VPSHLDDMasked256Merging dst [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VMAXPS256 {
+               if v_1.Op != OpAMD64VPSHLDD256 {
                        break
                }
+               a := auxIntToUint8(v_1.AuxInt)
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VMAXPSMasked256Merging)
+               v.reset(OpAMD64VPSHLDDMasked256Merging)
+               v.AuxInt = uint8ToAuxInt(a)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPMAXSD256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPSHLDQ256 [a] x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMAXSDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // result: (VPSHLDQMasked256Merging dst [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMAXSD256 {
+               if v_1.Op != OpAMD64VPSHLDQ256 {
                        break
                }
+               a := auxIntToUint8(v_1.AuxInt)
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMAXSDMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPSHLDQMasked256Merging)
+               v.AuxInt = uint8ToAuxInt(a)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VMULPD256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPSHLDW256 [a] x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VMULPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // result: (VPSHLDWMasked256Merging dst [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VMULPD256 {
+               if v_1.Op != OpAMD64VPSHLDW256 {
                        break
                }
+               a := auxIntToUint8(v_1.AuxInt)
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VMULPDMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPSHLDWMasked256Merging)
+               v.AuxInt = uint8ToAuxInt(a)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VDIVPS256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPSHRDD256 [a] x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VDIVPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // result: (VPSHRDDMasked256Merging dst [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VDIVPS256 {
+               if v_1.Op != OpAMD64VPSHRDD256 {
                        break
                }
+               a := auxIntToUint8(v_1.AuxInt)
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VDIVPSMasked256Merging)
+               v.reset(OpAMD64VPSHRDDMasked256Merging)
+               v.AuxInt = uint8ToAuxInt(a)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPMAXSQ256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPSHRDQ256 [a] x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMAXSQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // result: (VPSHRDQMasked256Merging dst [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMAXSQ256 {
+               if v_1.Op != OpAMD64VPSHRDQ256 {
                        break
                }
+               a := auxIntToUint8(v_1.AuxInt)
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMAXSQMasked256Merging)
+               v.reset(OpAMD64VPSHRDQMasked256Merging)
+               v.AuxInt = uint8ToAuxInt(a)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VMINPD256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPSHRDW256 [a] x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VMINPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // result: (VPSHRDWMasked256Merging dst [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VMINPD256 {
+               if v_1.Op != OpAMD64VPSHRDW256 {
                        break
                }
+               a := auxIntToUint8(v_1.AuxInt)
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VMINPDMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPSHRDWMasked256Merging)
+               v.AuxInt = uint8ToAuxInt(a)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPSHLDQ256 [a] x y) mask)
+       // match: (VPBLENDVB256 dst (VPSHUFB256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSHLDQMasked256Merging dst [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // result: (VPSHUFBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSHLDQ256 {
+               if v_1.Op != OpAMD64VPSHUFB256 {
                        break
                }
-               a := auxIntToUint8(v_1.AuxInt)
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSHLDQMasked256Merging)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPSHUFBMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VSCALEFPD256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPSHUFD256 [a] x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VSCALEFPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // result: (VPSHUFDMasked256Merging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VSCALEFPD256 {
+               if v_1.Op != OpAMD64VPSHUFD256 {
                        break
                }
-               y := v_1.Args[1]
+               a := auxIntToUint8(v_1.AuxInt)
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VSCALEFPDMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPSHUFDMasked256Merging)
+               v.AuxInt = uint8ToAuxInt(a)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPMOVSWB128 x) mask)
+       // match: (VPBLENDVB256 dst (VPSHUFHW256 [a] x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMOVSWBMasked128Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // result: (VPSHUFHWMasked256Merging dst [a] x (VPMOVVec16x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVSWB128 {
+               if v_1.Op != OpAMD64VPSHUFHW256 {
                        break
                }
+               a := auxIntToUint8(v_1.AuxInt)
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMOVSWBMasked128Merging)
+               v.reset(OpAMD64VPSHUFHWMasked256Merging)
+               v.AuxInt = uint8ToAuxInt(a)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPMINSQ256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPSLLD256const [a] x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMINSQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // result: (VPSLLDMasked256constMerging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMINSQ256 {
+               if v_1.Op != OpAMD64VPSLLD256const {
                        break
                }
-               y := v_1.Args[1]
+               a := auxIntToUint8(v_1.AuxInt)
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMINSQMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPSLLDMasked256constMerging)
+               v.AuxInt = uint8ToAuxInt(a)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPABSD256 x) mask)
+       // match: (VPBLENDVB256 dst (VPSLLQ256const [a] x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPABSDMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // result: (VPSLLQMasked256constMerging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPABSD256 {
+               if v_1.Op != OpAMD64VPSLLQ256const {
                        break
                }
+               a := auxIntToUint8(v_1.AuxInt)
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPABSDMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPSLLQMasked256constMerging)
+               v.AuxInt = uint8ToAuxInt(a)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPMINUW256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPSLLVD256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMINUWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // result: (VPSLLVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMINUW256 {
+               if v_1.Op != OpAMD64VPSLLVD256 {
                        break
                }
                y := v_1.Args[1]
@@ -47194,121 +48028,121 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMINUWMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+               v.reset(OpAMD64VPSLLVDMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPSHRDW256 [a] x y) mask)
+       // match: (VPBLENDVB256 dst (VPSLLVQ256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSHRDWMasked256Merging dst [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // result: (VPSLLVQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSHRDW256 {
+               if v_1.Op != OpAMD64VPSLLVQ256 {
                        break
                }
-               a := auxIntToUint8(v_1.AuxInt)
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSHRDWMasked256Merging)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+               v.reset(OpAMD64VPSLLVQMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPMOVZXBW512 x) mask)
+       // match: (VPBLENDVB256 dst (VPSLLVW256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMOVZXBWMasked512Merging dst x (VPMOVVec8x32ToM <types.TypeMask> mask))
+       // result: (VPSLLVWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVZXBW512 {
+               if v_1.Op != OpAMD64VPSLLVW256 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMOVZXBWMasked512Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
+               v.reset(OpAMD64VPSLLVWMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPMAXUD256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPSLLW256const [a] x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMAXUDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // result: (VPSLLWMasked256constMerging dst [a] x (VPMOVVec16x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMAXUD256 {
+               if v_1.Op != OpAMD64VPSLLW256const {
                        break
                }
-               y := v_1.Args[1]
+               a := auxIntToUint8(v_1.AuxInt)
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMAXUDMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPSLLWMasked256constMerging)
+               v.AuxInt = uint8ToAuxInt(a)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPMAXSB256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPSRAD256const [a] x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMAXSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+       // result: (VPSRADMasked256constMerging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMAXSB256 {
+               if v_1.Op != OpAMD64VPSRAD256const {
                        break
                }
-               y := v_1.Args[1]
+               a := auxIntToUint8(v_1.AuxInt)
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMAXSBMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
+               v.reset(OpAMD64VPSRADMasked256constMerging)
+               v.AuxInt = uint8ToAuxInt(a)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPSHRDQ256 [a] x y) mask)
+       // match: (VPBLENDVB256 dst (VPSRAQ256const [a] x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSHRDQMasked256Merging dst [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // result: (VPSRAQMasked256constMerging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSHRDQ256 {
+               if v_1.Op != OpAMD64VPSRAQ256const {
                        break
                }
                a := auxIntToUint8(v_1.AuxInt)
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSHRDQMasked256Merging)
+               v.reset(OpAMD64VPSRAQMasked256constMerging)
                v.AuxInt = uint8ToAuxInt(a)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPMADDUBSW256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPSRAVD256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMADDUBSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // result: (VPSRAVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMADDUBSW256 {
+               if v_1.Op != OpAMD64VPSRAVD256 {
                        break
                }
                y := v_1.Args[1]
@@ -47317,39 +48151,38 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMADDUBSWMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+               v.reset(OpAMD64VPSRAVDMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPSLLD256const [a] x) mask)
+       // match: (VPBLENDVB256 dst (VPSRAVQ256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSLLDMasked256constMerging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // result: (VPSRAVQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSLLD256const {
+               if v_1.Op != OpAMD64VPSRAVQ256 {
                        break
                }
-               a := auxIntToUint8(v_1.AuxInt)
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSLLDMasked256constMerging)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPSRAVQMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPMINUQ256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPSRAVW256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMINUQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // result: (VPSRAVWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMINUQ256 {
+               if v_1.Op != OpAMD64VPSRAVW256 {
                        break
                }
                y := v_1.Args[1]
@@ -47358,59 +48191,59 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMINUQMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPSRAVWMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VRCP14PD256 x) mask)
+       // match: (VPBLENDVB256 dst (VPSRAW256const [a] x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VRCP14PDMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // result: (VPSRAWMasked256constMerging dst [a] x (VPMOVVec16x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VRCP14PD256 {
+               if v_1.Op != OpAMD64VPSRAW256const {
                        break
                }
+               a := auxIntToUint8(v_1.AuxInt)
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VRCP14PDMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPSRAWMasked256constMerging)
+               v.AuxInt = uint8ToAuxInt(a)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPSHRDD256 [a] x y) mask)
+       // match: (VPBLENDVB256 dst (VPSRLVD256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSHRDDMasked256Merging dst [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // result: (VPSRLVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSHRDD256 {
+               if v_1.Op != OpAMD64VPSRLVD256 {
                        break
                }
-               a := auxIntToUint8(v_1.AuxInt)
                y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSHRDDMasked256Merging)
-               v.AuxInt = uint8ToAuxInt(a)
+               v.reset(OpAMD64VPSRLVDMasked256Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPADDQ256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPSRLVQ256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPADDQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // result: (VPSRLVQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPADDQ256 {
+               if v_1.Op != OpAMD64VPSRLVQ256 {
                        break
                }
                y := v_1.Args[1]
@@ -47419,18 +48252,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPADDQMasked256Merging)
+               v.reset(OpAMD64VPSRLVQMasked256Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPMAXUB256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPSRLVW256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMAXUBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+       // result: (VPSRLVWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMAXUB256 {
+               if v_1.Op != OpAMD64VPSRLVW256 {
                        break
                }
                y := v_1.Args[1]
@@ -47439,39 +48272,38 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMAXUBMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
+               v.reset(OpAMD64VPSRLVWMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPRORQ256 [a] x) mask)
+       // match: (VPBLENDVB256 dst (VPSUBB256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPRORQMasked256Merging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // result: (VPSUBBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPRORQ256 {
+               if v_1.Op != OpAMD64VPSUBB256 {
                        break
                }
-               a := auxIntToUint8(v_1.AuxInt)
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPRORQMasked256Merging)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPSUBBMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VADDPD256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPSUBD256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VADDPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // result: (VPSUBDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VADDPD256 {
+               if v_1.Op != OpAMD64VPSUBD256 {
                        break
                }
                y := v_1.Args[1]
@@ -47480,18 +48312,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VADDPDMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPSUBDMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPSHUFB256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPSUBQ256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSHUFBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+       // result: (VPSUBQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSHUFB256 {
+               if v_1.Op != OpAMD64VPSUBQ256 {
                        break
                }
                y := v_1.Args[1]
@@ -47500,39 +48332,38 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSHUFBMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
+               v.reset(OpAMD64VPSUBQMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPSRAD256const [a] x) mask)
+       // match: (VPBLENDVB256 dst (VPSUBSB256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSRADMasked256constMerging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // result: (VPSUBSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSRAD256const {
+               if v_1.Op != OpAMD64VPSUBSB256 {
                        break
                }
-               a := auxIntToUint8(v_1.AuxInt)
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSRADMasked256constMerging)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPSUBSBMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPSUBW256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPSUBSW256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSUBWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // result: (VPSUBSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSUBW256 {
+               if v_1.Op != OpAMD64VPSUBSW256 {
                        break
                }
                y := v_1.Args[1]
@@ -47541,58 +48372,58 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSUBWMasked256Merging)
+               v.reset(OpAMD64VPSUBSWMasked256Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPSRAW256const [a] x) mask)
+       // match: (VPBLENDVB256 dst (VPSUBUSB256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSRAWMasked256constMerging dst [a] x (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // result: (VPSUBUSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSRAW256const {
+               if v_1.Op != OpAMD64VPSUBUSB256 {
                        break
                }
-               a := auxIntToUint8(v_1.AuxInt)
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSRAWMasked256constMerging)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+               v.reset(OpAMD64VPSUBUSBMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPABSW256 x) mask)
+       // match: (VPBLENDVB256 dst (VPSUBUSW256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPABSWMasked256Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // result: (VPSUBUSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPABSW256 {
+               if v_1.Op != OpAMD64VPSUBUSW256 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPABSWMasked256Merging)
+               v.reset(OpAMD64VPSUBUSWMasked256Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPACKSSDW256 x y) mask)
+       // match: (VPBLENDVB256 dst (VPSUBW256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPACKSSDWMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // result: (VPSUBWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPACKSSDW256 {
+               if v_1.Op != OpAMD64VPSUBW256 {
                        break
                }
                y := v_1.Args[1]
@@ -47601,18 +48432,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPACKSSDWMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPSUBWMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPMOVSQD128 x) mask)
+       // match: (VPBLENDVB256 dst (VRCP14PD256 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMOVSQDMasked128Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // result: (VRCP14PDMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVSQD128 {
+               if v_1.Op != OpAMD64VRCP14PD256 {
                        break
                }
                x := v_1.Args[0]
@@ -47620,116 +48451,121 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMOVSQDMasked128Merging)
+               v.reset(OpAMD64VRCP14PDMasked256Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPLZCNTD256 x) mask)
+       // match: (VPBLENDVB256 dst (VREDUCEPD256 [a] x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPLZCNTDMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // result: (VREDUCEPDMasked256Merging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPLZCNTD256 {
+               if v_1.Op != OpAMD64VREDUCEPD256 {
                        break
                }
+               a := auxIntToUint8(v_1.AuxInt)
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPLZCNTDMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v.reset(OpAMD64VREDUCEPDMasked256Merging)
+               v.AuxInt = uint8ToAuxInt(a)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VMAXPD256 x y) mask)
+       // match: (VPBLENDVB256 dst (VREDUCEPS256 [a] x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VMAXPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // result: (VREDUCEPSMasked256Merging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VMAXPD256 {
+               if v_1.Op != OpAMD64VREDUCEPS256 {
                        break
                }
-               y := v_1.Args[1]
+               a := auxIntToUint8(v_1.AuxInt)
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VMAXPDMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v.reset(OpAMD64VREDUCEPSMasked256Merging)
+               v.AuxInt = uint8ToAuxInt(a)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPAVGW256 x y) mask)
+       // match: (VPBLENDVB256 dst (VRNDSCALEPD256 [a] x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPAVGWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // result: (VRNDSCALEPDMasked256Merging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPAVGW256 {
+               if v_1.Op != OpAMD64VRNDSCALEPD256 {
                        break
                }
-               y := v_1.Args[1]
+               a := auxIntToUint8(v_1.AuxInt)
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPAVGWMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+               v.reset(OpAMD64VRNDSCALEPDMasked256Merging)
+               v.AuxInt = uint8ToAuxInt(a)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPOPCNTQ256 x) mask)
+       // match: (VPBLENDVB256 dst (VRNDSCALEPS256 [a] x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPOPCNTQMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // result: (VRNDSCALEPSMasked256Merging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPOPCNTQ256 {
+               if v_1.Op != OpAMD64VRNDSCALEPS256 {
                        break
                }
+               a := auxIntToUint8(v_1.AuxInt)
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPOPCNTQMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v.reset(OpAMD64VRNDSCALEPSMasked256Merging)
+               v.AuxInt = uint8ToAuxInt(a)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPSUBSW256 x y) mask)
+       // match: (VPBLENDVB256 dst (VRSQRT14PD256 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSUBSWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // result: (VRSQRT14PDMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSUBSW256 {
+               if v_1.Op != OpAMD64VRSQRT14PD256 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSUBSWMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+               v.reset(OpAMD64VRSQRT14PDMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPMAXUW256 x y) mask)
+       // match: (VPBLENDVB256 dst (VSCALEFPD256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMAXUWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // result: (VSCALEFPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMAXUW256 {
+               if v_1.Op != OpAMD64VSCALEFPD256 {
                        break
                }
                y := v_1.Args[1]
@@ -47738,57 +48574,57 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMAXUWMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+               v.reset(OpAMD64VSCALEFPDMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPOPCNTW256 x) mask)
+       // match: (VPBLENDVB256 dst (VSCALEFPS256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPOPCNTWMasked256Merging dst x (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // result: (VSCALEFPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPOPCNTW256 {
+               if v_1.Op != OpAMD64VSCALEFPS256 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPOPCNTWMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+               v.reset(OpAMD64VSCALEFPSMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPSRAVQ256 x y) mask)
+       // match: (VPBLENDVB256 dst (VSQRTPD256 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSRAVQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // result: (VSQRTPDMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSRAVQ256 {
+               if v_1.Op != OpAMD64VSQRTPD256 {
                        break
                }
-               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSRAVQMasked256Merging)
+               v.reset(OpAMD64VSQRTPDMasked256Merging)
                v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg4(dst, x, y, v0)
+               v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPABSB256 x) mask)
+       // match: (VPBLENDVB256 dst (VSQRTPS256 x) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPABSBMasked256Merging dst x (VPMOVVec8x32ToM <types.TypeMask> mask))
+       // result: (VSQRTPSMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPABSB256 {
+               if v_1.Op != OpAMD64VSQRTPS256 {
                        break
                }
                x := v_1.Args[0]
@@ -47796,37 +48632,38 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPABSBMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
+               v.reset(OpAMD64VSQRTPSMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg3(dst, x, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPMOVDW128 x) mask)
+       // match: (VPBLENDVB256 dst (VSUBPD256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPMOVDWMasked128Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // result: (VSUBPDMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPMOVDW128 {
+               if v_1.Op != OpAMD64VSUBPD256 {
                        break
                }
+               y := v_1.Args[1]
                x := v_1.Args[0]
                mask := v_2
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPMOVDWMasked128Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v.reset(OpAMD64VSUBPDMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
                v0.AddArg(mask)
-               v.AddArg3(dst, x, v0)
+               v.AddArg4(dst, x, y, v0)
                return true
        }
-       // match: (VPBLENDVB256 dst (VPSUBSB256 x y) mask)
+       // match: (VPBLENDVB256 dst (VSUBPS256 x y) mask)
        // cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
-       // result: (VPSUBSBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+       // result: (VSUBPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
        for {
                dst := v_0
-               if v_1.Op != OpAMD64VPSUBSB256 {
+               if v_1.Op != OpAMD64VSUBPS256 {
                        break
                }
                y := v_1.Args[1]
@@ -47835,8 +48672,8 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
                if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
                        break
                }
-               v.reset(OpAMD64VPSUBSBMasked256Merging)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
+               v.reset(OpAMD64VSUBPSMasked256Merging)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                v0.AddArg(mask)
                v.AddArg4(dst, x, y, v0)
                return true
index 8dd1707da9f71ea8f587c4330763a2b345781654..19393add71abb858d13f447dd5cfd1f0f151efb1 100644 (file)
@@ -318,13 +318,25 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer {
                        case 128, 256:
                                // VPBLENDVB cases.
                                noMaskName := machineOpName(NoMask, gOp)
-                               maskedMergeOpts[noMaskName] = fmt.Sprintf("(VPBLENDVB%d dst (%s %s) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (%sMerging dst %s (VPMOVVec%dx%dToM <types.TypeMask> mask))\n",
+                               ruleExisting, ok := maskedMergeOpts[noMaskName]
+                               rule := fmt.Sprintf("(VPBLENDVB%d dst (%s %s) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (%sMerging dst %s (VPMOVVec%dx%dToM <types.TypeMask> mask))\n",
                                        *maskElem.Bits, noMaskName, data.Args, data.Asm, data.Args, *maskElem.ElemBits, *maskElem.Lanes)
+                               if ok && ruleExisting != rule {
+                                       panic("multiple masked merge rules for one op")
+                               } else {
+                                       maskedMergeOpts[noMaskName] = rule
+                               }
                        case 512:
                                // VPBLENDM[BWDQ] cases.
                                noMaskName := machineOpName(NoMask, gOp)
-                               maskedMergeOpts[noMaskName] = fmt.Sprintf("(VPBLENDM%sMasked%d dst (%s %s) mask) => (%sMerging dst %s mask)\n",
+                               ruleExisting, ok := maskedMergeOpts[noMaskName]
+                               rule := fmt.Sprintf("(VPBLENDM%sMasked%d dst (%s %s) mask) => (%sMerging dst %s mask)\n",
                                        s2n[*maskElem.ElemBits], *maskElem.Bits, noMaskName, data.Args, data.Asm, data.Args)
+                               if ok && ruleExisting != rule {
+                                       panic("multiple masked merge rules for one op")
+                               } else {
+                                       maskedMergeOpts[noMaskName] = rule
+                               }
                        }
                }
 
@@ -362,10 +374,15 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer {
                }
        }
 
+       maskedMergeOptsRules := []string{}
        for asm, rule := range maskedMergeOpts {
                if !asmCheck[asm] {
                        continue
                }
+               maskedMergeOptsRules = append(maskedMergeOptsRules, rule)
+       }
+       slices.Sort(maskedMergeOptsRules)
+       for _, rule := range maskedMergeOptsRules {
                buffer.WriteString(rule)
        }
 
index f42251c5c31bdaa5bdf5372d892ce8374ad7fd09..7d3943b4b841b3cacbc5f7e39c6adf11afe9a540 100644 (file)
@@ -133,6 +133,25 @@ func (o *Operation) VectorWidth() int {
        panic(fmt.Errorf("Figure out what the vector width is for %v and implement it", *o))
 }
 
+// Right now simdgen computes the machine op name for most instructions
+// as $Name$OutputSize, by this denotation, these instructions are "overloaded".
+// for example:
+// (Uint16x8) ConvertToInt8
+// (Uint16x16) ConvertToInt8
+// are both VPMOVWB128.
+// To make them distinguishable we need to append the input size to them as well.
+// TODO: document them well in the generated code.
+var demotingConvertOps = map[string]bool{
+       "VPMOVQD128": true, "VPMOVSQD128": true, "VPMOVUSQD128": true, "VPMOVQW128": true, "VPMOVSQW128": true,
+       "VPMOVUSQW128": true, "VPMOVDW128": true, "VPMOVSDW128": true, "VPMOVUSDW128": true, "VPMOVQB128": true,
+       "VPMOVSQB128": true, "VPMOVUSQB128": true, "VPMOVDB128": true, "VPMOVSDB128": true, "VPMOVUSDB128": true,
+       "VPMOVWB128": true, "VPMOVSWB128": true, "VPMOVUSWB128": true,
+       "VPMOVQDMasked128": true, "VPMOVSQDMasked128": true, "VPMOVUSQDMasked128": true, "VPMOVQWMasked128": true, "VPMOVSQWMasked128": true,
+       "VPMOVUSQWMasked128": true, "VPMOVDWMasked128": true, "VPMOVSDWMasked128": true, "VPMOVUSDWMasked128": true, "VPMOVQBMasked128": true,
+       "VPMOVSQBMasked128": true, "VPMOVUSQBMasked128": true, "VPMOVDBMasked128": true, "VPMOVSDBMasked128": true, "VPMOVUSDBMasked128": true,
+       "VPMOVWBMasked128": true, "VPMOVSWBMasked128": true, "VPMOVUSWBMasked128": true,
+}
+
 func machineOpName(maskType maskShape, gOp Operation) string {
        asm := gOp.Asm
        if maskType == OneMask {
@@ -142,6 +161,11 @@ func machineOpName(maskType maskShape, gOp Operation) string {
        if gOp.SSAVariant != nil {
                asm += *gOp.SSAVariant
        }
+       if demotingConvertOps[asm] {
+               // Need to append the size of the source as well.
+               // TODO: should be "%sto%d".
+               asm = fmt.Sprintf("%s_%d", asm, *gOp.In[0].Bits)
+       }
        return asm
 }