From e4d94842207a7f29fb473ecece2acdc5a2a207f7 Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Thu, 13 Nov 2025 17:07:16 +0000 Subject: [PATCH] [dev.simd] cmd/compile: fix unstable output This CL fixed an error left by CL 718160. Change-Id: I442ea59bc1ff0dda2914d1858dd5ebe93e2818dc Reviewed-on: https://go-review.googlesource.com/c/go/+/720281 LUCI-TryBot-Result: Go LUCI Reviewed-by: David Chase Reviewed-by: Cherry Mui --- src/cmd/compile/internal/amd64/simdssa.go | 252 +- .../compile/internal/ssa/_gen/simdAMD64.rules | 976 +-- .../compile/internal/ssa/_gen/simdAMD64ops.go | 189 +- src/cmd/compile/internal/ssa/opGen.go | 1458 ++++- src/cmd/compile/internal/ssa/rewriteAMD64.go | 5605 ++++++++++------- src/simd/_gen/simdgen/gen_simdrules.go | 21 +- src/simd/_gen/simdgen/godefs.go | 24 + 7 files changed, 5444 insertions(+), 3081 deletions(-) diff --git a/src/cmd/compile/internal/amd64/simdssa.go b/src/cmd/compile/internal/amd64/simdssa.go index 0abcd95e37..9425b42d41 100644 --- a/src/cmd/compile/internal/amd64/simdssa.go +++ b/src/cmd/compile/internal/amd64/simdssa.go @@ -42,22 +42,38 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPBROADCASTW512, ssa.OpAMD64VPBROADCASTD512, ssa.OpAMD64VPBROADCASTQ512, - ssa.OpAMD64VPMOVWB128, + ssa.OpAMD64VPMOVWB128_128, + ssa.OpAMD64VPMOVWB128_256, ssa.OpAMD64VPMOVWB256, - ssa.OpAMD64VPMOVDB128, - ssa.OpAMD64VPMOVQB128, - ssa.OpAMD64VPMOVSWB128, + ssa.OpAMD64VPMOVDB128_128, + ssa.OpAMD64VPMOVDB128_256, + ssa.OpAMD64VPMOVDB128_512, + ssa.OpAMD64VPMOVQB128_128, + ssa.OpAMD64VPMOVQB128_256, + ssa.OpAMD64VPMOVQB128_512, + ssa.OpAMD64VPMOVSWB128_128, + ssa.OpAMD64VPMOVSWB128_256, ssa.OpAMD64VPMOVSWB256, - ssa.OpAMD64VPMOVSDB128, - ssa.OpAMD64VPMOVSQB128, + ssa.OpAMD64VPMOVSDB128_128, + ssa.OpAMD64VPMOVSDB128_256, + ssa.OpAMD64VPMOVSDB128_512, + ssa.OpAMD64VPMOVSQB128_128, + ssa.OpAMD64VPMOVSQB128_256, + ssa.OpAMD64VPMOVSQB128_512, ssa.OpAMD64VPMOVSXBW256, ssa.OpAMD64VPMOVSXBW512, - ssa.OpAMD64VPMOVDW128, + ssa.OpAMD64VPMOVDW128_128, + ssa.OpAMD64VPMOVDW128_256, ssa.OpAMD64VPMOVDW256, - ssa.OpAMD64VPMOVQW128, - ssa.OpAMD64VPMOVSDW128, + ssa.OpAMD64VPMOVQW128_128, + ssa.OpAMD64VPMOVQW128_256, + ssa.OpAMD64VPMOVQW128_512, + ssa.OpAMD64VPMOVSDW128_128, + ssa.OpAMD64VPMOVSDW128_256, ssa.OpAMD64VPMOVSDW256, - ssa.OpAMD64VPMOVSQW128, + ssa.OpAMD64VPMOVSQW128_128, + ssa.OpAMD64VPMOVSQW128_256, + ssa.OpAMD64VPMOVSQW128_512, ssa.OpAMD64VPMOVSXBW128, ssa.OpAMD64VCVTTPS2DQ128, ssa.OpAMD64VCVTTPS2DQ256, @@ -65,9 +81,11 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPMOVSXBD512, ssa.OpAMD64VPMOVSXWD256, ssa.OpAMD64VPMOVSXWD512, - ssa.OpAMD64VPMOVQD128, + ssa.OpAMD64VPMOVQD128_128, + ssa.OpAMD64VPMOVQD128_256, ssa.OpAMD64VPMOVQD256, - ssa.OpAMD64VPMOVSQD128, + ssa.OpAMD64VPMOVSQD128_128, + ssa.OpAMD64VPMOVSQD128_256, ssa.OpAMD64VPMOVSQD256, ssa.OpAMD64VPMOVSXBD128, ssa.OpAMD64VPMOVSXWD128, @@ -80,15 +98,23 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPMOVSXDQ128, ssa.OpAMD64VPMOVSXBQ256, ssa.OpAMD64VPMOVSXBQ512, - ssa.OpAMD64VPMOVUSWB128, + ssa.OpAMD64VPMOVUSWB128_128, + ssa.OpAMD64VPMOVUSWB128_256, ssa.OpAMD64VPMOVUSWB256, - ssa.OpAMD64VPMOVUSDB128, - ssa.OpAMD64VPMOVUSQB128, + ssa.OpAMD64VPMOVUSDB128_128, + ssa.OpAMD64VPMOVUSDB128_256, + ssa.OpAMD64VPMOVUSDB128_512, + ssa.OpAMD64VPMOVUSQB128_128, + ssa.OpAMD64VPMOVUSQB128_256, + ssa.OpAMD64VPMOVUSQB128_512, ssa.OpAMD64VPMOVZXBW256, ssa.OpAMD64VPMOVZXBW512, - ssa.OpAMD64VPMOVUSDW128, + ssa.OpAMD64VPMOVUSDW128_128, + ssa.OpAMD64VPMOVUSDW128_256, ssa.OpAMD64VPMOVUSDW256, - ssa.OpAMD64VPMOVUSQW128, + ssa.OpAMD64VPMOVUSQW128_128, + ssa.OpAMD64VPMOVUSQW128_256, + ssa.OpAMD64VPMOVUSQW128_512, ssa.OpAMD64VPMOVZXBW128, ssa.OpAMD64VCVTPS2UDQ128, ssa.OpAMD64VCVTPS2UDQ256, @@ -96,7 +122,8 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPMOVZXBD512, ssa.OpAMD64VPMOVZXWD256, ssa.OpAMD64VPMOVZXWD512, - ssa.OpAMD64VPMOVUSQD128, + ssa.OpAMD64VPMOVUSQD128_128, + ssa.OpAMD64VPMOVUSQD128_256, ssa.OpAMD64VPMOVUSQD256, ssa.OpAMD64VPMOVZXBD128, ssa.OpAMD64VPMOVZXWD128, @@ -791,22 +818,38 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPCOMPRESSQMasked128, ssa.OpAMD64VPCOMPRESSQMasked256, ssa.OpAMD64VPCOMPRESSQMasked512, - ssa.OpAMD64VPMOVWBMasked128, + ssa.OpAMD64VPMOVWBMasked128_128, + ssa.OpAMD64VPMOVWBMasked128_256, ssa.OpAMD64VPMOVWBMasked256, - ssa.OpAMD64VPMOVDBMasked128, - ssa.OpAMD64VPMOVQBMasked128, - ssa.OpAMD64VPMOVSWBMasked128, + ssa.OpAMD64VPMOVDBMasked128_128, + ssa.OpAMD64VPMOVDBMasked128_256, + ssa.OpAMD64VPMOVDBMasked128_512, + ssa.OpAMD64VPMOVQBMasked128_128, + ssa.OpAMD64VPMOVQBMasked128_256, + ssa.OpAMD64VPMOVQBMasked128_512, + ssa.OpAMD64VPMOVSWBMasked128_128, + ssa.OpAMD64VPMOVSWBMasked128_256, ssa.OpAMD64VPMOVSWBMasked256, - ssa.OpAMD64VPMOVSDBMasked128, - ssa.OpAMD64VPMOVSQBMasked128, + ssa.OpAMD64VPMOVSDBMasked128_128, + ssa.OpAMD64VPMOVSDBMasked128_256, + ssa.OpAMD64VPMOVSDBMasked128_512, + ssa.OpAMD64VPMOVSQBMasked128_128, + ssa.OpAMD64VPMOVSQBMasked128_256, + ssa.OpAMD64VPMOVSQBMasked128_512, ssa.OpAMD64VPMOVSXBWMasked256, ssa.OpAMD64VPMOVSXBWMasked512, - ssa.OpAMD64VPMOVDWMasked128, + ssa.OpAMD64VPMOVDWMasked128_128, + ssa.OpAMD64VPMOVDWMasked128_256, ssa.OpAMD64VPMOVDWMasked256, - ssa.OpAMD64VPMOVQWMasked128, - ssa.OpAMD64VPMOVSDWMasked128, + ssa.OpAMD64VPMOVQWMasked128_128, + ssa.OpAMD64VPMOVQWMasked128_256, + ssa.OpAMD64VPMOVQWMasked128_512, + ssa.OpAMD64VPMOVSDWMasked128_128, + ssa.OpAMD64VPMOVSDWMasked128_256, ssa.OpAMD64VPMOVSDWMasked256, - ssa.OpAMD64VPMOVSQWMasked128, + ssa.OpAMD64VPMOVSQWMasked128_128, + ssa.OpAMD64VPMOVSQWMasked128_256, + ssa.OpAMD64VPMOVSQWMasked128_512, ssa.OpAMD64VPMOVSXBWMasked128, ssa.OpAMD64VCVTTPS2DQMasked128, ssa.OpAMD64VCVTTPS2DQMasked256, @@ -814,9 +857,11 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPMOVSXBDMasked512, ssa.OpAMD64VPMOVSXWDMasked256, ssa.OpAMD64VPMOVSXWDMasked512, - ssa.OpAMD64VPMOVQDMasked128, + ssa.OpAMD64VPMOVQDMasked128_128, + ssa.OpAMD64VPMOVQDMasked128_256, ssa.OpAMD64VPMOVQDMasked256, - ssa.OpAMD64VPMOVSQDMasked128, + ssa.OpAMD64VPMOVSQDMasked128_128, + ssa.OpAMD64VPMOVSQDMasked128_256, ssa.OpAMD64VPMOVSQDMasked256, ssa.OpAMD64VPMOVSXBDMasked128, ssa.OpAMD64VPMOVSXWDMasked128, @@ -829,15 +874,23 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPMOVSXDQMasked128, ssa.OpAMD64VPMOVSXBQMasked256, ssa.OpAMD64VPMOVSXBQMasked512, - ssa.OpAMD64VPMOVUSWBMasked128, + ssa.OpAMD64VPMOVUSWBMasked128_128, + ssa.OpAMD64VPMOVUSWBMasked128_256, ssa.OpAMD64VPMOVUSWBMasked256, - ssa.OpAMD64VPMOVUSDBMasked128, - ssa.OpAMD64VPMOVUSQBMasked128, + ssa.OpAMD64VPMOVUSDBMasked128_128, + ssa.OpAMD64VPMOVUSDBMasked128_256, + ssa.OpAMD64VPMOVUSDBMasked128_512, + ssa.OpAMD64VPMOVUSQBMasked128_128, + ssa.OpAMD64VPMOVUSQBMasked128_256, + ssa.OpAMD64VPMOVUSQBMasked128_512, ssa.OpAMD64VPMOVZXBWMasked256, ssa.OpAMD64VPMOVZXBWMasked512, - ssa.OpAMD64VPMOVUSDWMasked128, + ssa.OpAMD64VPMOVUSDWMasked128_128, + ssa.OpAMD64VPMOVUSDWMasked128_256, ssa.OpAMD64VPMOVUSDWMasked256, - ssa.OpAMD64VPMOVUSQWMasked128, + ssa.OpAMD64VPMOVUSQWMasked128_128, + ssa.OpAMD64VPMOVUSQWMasked128_256, + ssa.OpAMD64VPMOVUSQWMasked128_512, ssa.OpAMD64VPMOVZXBWMasked128, ssa.OpAMD64VCVTPS2UDQMasked128, ssa.OpAMD64VCVTPS2UDQMasked256, @@ -845,7 +898,8 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPMOVZXBDMasked512, ssa.OpAMD64VPMOVZXWDMasked256, ssa.OpAMD64VPMOVZXWDMasked512, - ssa.OpAMD64VPMOVUSQDMasked128, + ssa.OpAMD64VPMOVUSQDMasked128_128, + ssa.OpAMD64VPMOVUSQDMasked128_256, ssa.OpAMD64VPMOVUSQDMasked256, ssa.OpAMD64VPMOVZXBDMasked128, ssa.OpAMD64VPMOVZXWDMasked128, @@ -2266,22 +2320,38 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VREDUCEPDMasked128Merging, ssa.OpAMD64VREDUCEPDMasked256Merging, ssa.OpAMD64VREDUCEPDMasked512Merging, - ssa.OpAMD64VPMOVWBMasked128Merging, + ssa.OpAMD64VPMOVWBMasked128_128Merging, + ssa.OpAMD64VPMOVWBMasked128_256Merging, ssa.OpAMD64VPMOVWBMasked256Merging, - ssa.OpAMD64VPMOVDBMasked128Merging, - ssa.OpAMD64VPMOVQBMasked128Merging, - ssa.OpAMD64VPMOVSWBMasked128Merging, + ssa.OpAMD64VPMOVDBMasked128_128Merging, + ssa.OpAMD64VPMOVDBMasked128_256Merging, + ssa.OpAMD64VPMOVDBMasked128_512Merging, + ssa.OpAMD64VPMOVQBMasked128_128Merging, + ssa.OpAMD64VPMOVQBMasked128_256Merging, + ssa.OpAMD64VPMOVQBMasked128_512Merging, + ssa.OpAMD64VPMOVSWBMasked128_128Merging, + ssa.OpAMD64VPMOVSWBMasked128_256Merging, ssa.OpAMD64VPMOVSWBMasked256Merging, - ssa.OpAMD64VPMOVSDBMasked128Merging, - ssa.OpAMD64VPMOVSQBMasked128Merging, + ssa.OpAMD64VPMOVSDBMasked128_128Merging, + ssa.OpAMD64VPMOVSDBMasked128_256Merging, + ssa.OpAMD64VPMOVSDBMasked128_512Merging, + ssa.OpAMD64VPMOVSQBMasked128_128Merging, + ssa.OpAMD64VPMOVSQBMasked128_256Merging, + ssa.OpAMD64VPMOVSQBMasked128_512Merging, ssa.OpAMD64VPMOVSXBWMasked256Merging, ssa.OpAMD64VPMOVSXBWMasked512Merging, - ssa.OpAMD64VPMOVDWMasked128Merging, + ssa.OpAMD64VPMOVDWMasked128_128Merging, + ssa.OpAMD64VPMOVDWMasked128_256Merging, ssa.OpAMD64VPMOVDWMasked256Merging, - ssa.OpAMD64VPMOVQWMasked128Merging, - ssa.OpAMD64VPMOVSDWMasked128Merging, + ssa.OpAMD64VPMOVQWMasked128_128Merging, + ssa.OpAMD64VPMOVQWMasked128_256Merging, + ssa.OpAMD64VPMOVQWMasked128_512Merging, + ssa.OpAMD64VPMOVSDWMasked128_128Merging, + ssa.OpAMD64VPMOVSDWMasked128_256Merging, ssa.OpAMD64VPMOVSDWMasked256Merging, - ssa.OpAMD64VPMOVSQWMasked128Merging, + ssa.OpAMD64VPMOVSQWMasked128_128Merging, + ssa.OpAMD64VPMOVSQWMasked128_256Merging, + ssa.OpAMD64VPMOVSQWMasked128_512Merging, ssa.OpAMD64VPMOVSXBWMasked128Merging, ssa.OpAMD64VCVTTPS2DQMasked128Merging, ssa.OpAMD64VCVTTPS2DQMasked256Merging, @@ -2289,9 +2359,11 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPMOVSXBDMasked512Merging, ssa.OpAMD64VPMOVSXWDMasked256Merging, ssa.OpAMD64VPMOVSXWDMasked512Merging, - ssa.OpAMD64VPMOVQDMasked128Merging, + ssa.OpAMD64VPMOVQDMasked128_128Merging, + ssa.OpAMD64VPMOVQDMasked128_256Merging, ssa.OpAMD64VPMOVQDMasked256Merging, - ssa.OpAMD64VPMOVSQDMasked128Merging, + ssa.OpAMD64VPMOVSQDMasked128_128Merging, + ssa.OpAMD64VPMOVSQDMasked128_256Merging, ssa.OpAMD64VPMOVSQDMasked256Merging, ssa.OpAMD64VPMOVSXBDMasked128Merging, ssa.OpAMD64VPMOVSXWDMasked128Merging, @@ -2304,15 +2376,23 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPMOVSXDQMasked128Merging, ssa.OpAMD64VPMOVSXBQMasked256Merging, ssa.OpAMD64VPMOVSXBQMasked512Merging, - ssa.OpAMD64VPMOVUSWBMasked128Merging, + ssa.OpAMD64VPMOVUSWBMasked128_128Merging, + ssa.OpAMD64VPMOVUSWBMasked128_256Merging, ssa.OpAMD64VPMOVUSWBMasked256Merging, - ssa.OpAMD64VPMOVUSDBMasked128Merging, - ssa.OpAMD64VPMOVUSQBMasked128Merging, + ssa.OpAMD64VPMOVUSDBMasked128_128Merging, + ssa.OpAMD64VPMOVUSDBMasked128_256Merging, + ssa.OpAMD64VPMOVUSDBMasked128_512Merging, + ssa.OpAMD64VPMOVUSQBMasked128_128Merging, + ssa.OpAMD64VPMOVUSQBMasked128_256Merging, + ssa.OpAMD64VPMOVUSQBMasked128_512Merging, ssa.OpAMD64VPMOVZXBWMasked256Merging, ssa.OpAMD64VPMOVZXBWMasked512Merging, - ssa.OpAMD64VPMOVUSDWMasked128Merging, + ssa.OpAMD64VPMOVUSDWMasked128_128Merging, + ssa.OpAMD64VPMOVUSDWMasked128_256Merging, ssa.OpAMD64VPMOVUSDWMasked256Merging, - ssa.OpAMD64VPMOVUSQWMasked128Merging, + ssa.OpAMD64VPMOVUSQWMasked128_128Merging, + ssa.OpAMD64VPMOVUSQWMasked128_256Merging, + ssa.OpAMD64VPMOVUSQWMasked128_512Merging, ssa.OpAMD64VPMOVZXBWMasked128Merging, ssa.OpAMD64VCVTPS2UDQMasked128Merging, ssa.OpAMD64VCVTPS2UDQMasked256Merging, @@ -2320,7 +2400,8 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPMOVZXBDMasked512Merging, ssa.OpAMD64VPMOVZXWDMasked256Merging, ssa.OpAMD64VPMOVZXWDMasked512Merging, - ssa.OpAMD64VPMOVUSQDMasked128Merging, + ssa.OpAMD64VPMOVUSQDMasked128_128Merging, + ssa.OpAMD64VPMOVUSQDMasked128_256Merging, ssa.OpAMD64VPMOVUSQDMasked256Merging, ssa.OpAMD64VPMOVZXBDMasked128Merging, ssa.OpAMD64VPMOVZXWDMasked128Merging, @@ -2592,22 +2673,38 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPCOMPRESSQMasked128, ssa.OpAMD64VPCOMPRESSQMasked256, ssa.OpAMD64VPCOMPRESSQMasked512, - ssa.OpAMD64VPMOVWBMasked128, + ssa.OpAMD64VPMOVWBMasked128_128, + ssa.OpAMD64VPMOVWBMasked128_256, ssa.OpAMD64VPMOVWBMasked256, - ssa.OpAMD64VPMOVDBMasked128, - ssa.OpAMD64VPMOVQBMasked128, - ssa.OpAMD64VPMOVSWBMasked128, + ssa.OpAMD64VPMOVDBMasked128_128, + ssa.OpAMD64VPMOVDBMasked128_256, + ssa.OpAMD64VPMOVDBMasked128_512, + ssa.OpAMD64VPMOVQBMasked128_128, + ssa.OpAMD64VPMOVQBMasked128_256, + ssa.OpAMD64VPMOVQBMasked128_512, + ssa.OpAMD64VPMOVSWBMasked128_128, + ssa.OpAMD64VPMOVSWBMasked128_256, ssa.OpAMD64VPMOVSWBMasked256, - ssa.OpAMD64VPMOVSDBMasked128, - ssa.OpAMD64VPMOVSQBMasked128, + ssa.OpAMD64VPMOVSDBMasked128_128, + ssa.OpAMD64VPMOVSDBMasked128_256, + ssa.OpAMD64VPMOVSDBMasked128_512, + ssa.OpAMD64VPMOVSQBMasked128_128, + ssa.OpAMD64VPMOVSQBMasked128_256, + ssa.OpAMD64VPMOVSQBMasked128_512, ssa.OpAMD64VPMOVSXBWMasked256, ssa.OpAMD64VPMOVSXBWMasked512, - ssa.OpAMD64VPMOVDWMasked128, + ssa.OpAMD64VPMOVDWMasked128_128, + ssa.OpAMD64VPMOVDWMasked128_256, ssa.OpAMD64VPMOVDWMasked256, - ssa.OpAMD64VPMOVQWMasked128, - ssa.OpAMD64VPMOVSDWMasked128, + ssa.OpAMD64VPMOVQWMasked128_128, + ssa.OpAMD64VPMOVQWMasked128_256, + ssa.OpAMD64VPMOVQWMasked128_512, + ssa.OpAMD64VPMOVSDWMasked128_128, + ssa.OpAMD64VPMOVSDWMasked128_256, ssa.OpAMD64VPMOVSDWMasked256, - ssa.OpAMD64VPMOVSQWMasked128, + ssa.OpAMD64VPMOVSQWMasked128_128, + ssa.OpAMD64VPMOVSQWMasked128_256, + ssa.OpAMD64VPMOVSQWMasked128_512, ssa.OpAMD64VPACKSSDWMasked128, ssa.OpAMD64VPACKSSDWMasked128load, ssa.OpAMD64VPACKSSDWMasked256, @@ -2624,9 +2721,11 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPMOVSXBDMasked512, ssa.OpAMD64VPMOVSXWDMasked256, ssa.OpAMD64VPMOVSXWDMasked512, - ssa.OpAMD64VPMOVQDMasked128, + ssa.OpAMD64VPMOVQDMasked128_128, + ssa.OpAMD64VPMOVQDMasked128_256, ssa.OpAMD64VPMOVQDMasked256, - ssa.OpAMD64VPMOVSQDMasked128, + ssa.OpAMD64VPMOVSQDMasked128_128, + ssa.OpAMD64VPMOVSQDMasked128_256, ssa.OpAMD64VPMOVSQDMasked256, ssa.OpAMD64VPMOVSXBDMasked128, ssa.OpAMD64VPMOVSXWDMasked128, @@ -2639,15 +2738,23 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPMOVSXDQMasked128, ssa.OpAMD64VPMOVSXBQMasked256, ssa.OpAMD64VPMOVSXBQMasked512, - ssa.OpAMD64VPMOVUSWBMasked128, + ssa.OpAMD64VPMOVUSWBMasked128_128, + ssa.OpAMD64VPMOVUSWBMasked128_256, ssa.OpAMD64VPMOVUSWBMasked256, - ssa.OpAMD64VPMOVUSDBMasked128, - ssa.OpAMD64VPMOVUSQBMasked128, + ssa.OpAMD64VPMOVUSDBMasked128_128, + ssa.OpAMD64VPMOVUSDBMasked128_256, + ssa.OpAMD64VPMOVUSDBMasked128_512, + ssa.OpAMD64VPMOVUSQBMasked128_128, + ssa.OpAMD64VPMOVUSQBMasked128_256, + ssa.OpAMD64VPMOVUSQBMasked128_512, ssa.OpAMD64VPMOVZXBWMasked256, ssa.OpAMD64VPMOVZXBWMasked512, - ssa.OpAMD64VPMOVUSDWMasked128, + ssa.OpAMD64VPMOVUSDWMasked128_128, + ssa.OpAMD64VPMOVUSDWMasked128_256, ssa.OpAMD64VPMOVUSDWMasked256, - ssa.OpAMD64VPMOVUSQWMasked128, + ssa.OpAMD64VPMOVUSQWMasked128_128, + ssa.OpAMD64VPMOVUSQWMasked128_256, + ssa.OpAMD64VPMOVUSQWMasked128_512, ssa.OpAMD64VPACKUSDWMasked128, ssa.OpAMD64VPACKUSDWMasked128load, ssa.OpAMD64VPACKUSDWMasked256, @@ -2664,7 +2771,8 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPMOVZXBDMasked512, ssa.OpAMD64VPMOVZXWDMasked256, ssa.OpAMD64VPMOVZXWDMasked512, - ssa.OpAMD64VPMOVUSQDMasked128, + ssa.OpAMD64VPMOVUSQDMasked128_128, + ssa.OpAMD64VPMOVUSQDMasked128_256, ssa.OpAMD64VPMOVUSQDMasked256, ssa.OpAMD64VPMOVZXBDMasked128, ssa.OpAMD64VPMOVZXWDMasked128, diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules index 8332af2738..7ba970ca42 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules +++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules @@ -218,38 +218,38 @@ (CompressUint64x2 x mask) => (VPCOMPRESSQMasked128 x (VPMOVVec64x2ToM mask)) (CompressUint64x4 x mask) => (VPCOMPRESSQMasked256 x (VPMOVVec64x4ToM mask)) (CompressUint64x8 x mask) => (VPCOMPRESSQMasked512 x (VPMOVVec64x8ToM mask)) -(ConvertToInt8Int16x8 ...) => (VPMOVWB128 ...) -(ConvertToInt8Int16x16 ...) => (VPMOVWB128 ...) +(ConvertToInt8Int16x8 ...) => (VPMOVWB128_128 ...) +(ConvertToInt8Int16x16 ...) => (VPMOVWB128_256 ...) (ConvertToInt8Int16x32 ...) => (VPMOVWB256 ...) -(ConvertToInt8Int32x4 ...) => (VPMOVDB128 ...) -(ConvertToInt8Int32x8 ...) => (VPMOVDB128 ...) -(ConvertToInt8Int32x16 ...) => (VPMOVDB128 ...) -(ConvertToInt8Int64x2 ...) => (VPMOVQB128 ...) -(ConvertToInt8Int64x4 ...) => (VPMOVQB128 ...) -(ConvertToInt8Int64x8 ...) => (VPMOVQB128 ...) -(ConvertToInt8SaturatedInt16x8 ...) => (VPMOVSWB128 ...) -(ConvertToInt8SaturatedInt16x16 ...) => (VPMOVSWB128 ...) +(ConvertToInt8Int32x4 ...) => (VPMOVDB128_128 ...) +(ConvertToInt8Int32x8 ...) => (VPMOVDB128_256 ...) +(ConvertToInt8Int32x16 ...) => (VPMOVDB128_512 ...) +(ConvertToInt8Int64x2 ...) => (VPMOVQB128_128 ...) +(ConvertToInt8Int64x4 ...) => (VPMOVQB128_256 ...) +(ConvertToInt8Int64x8 ...) => (VPMOVQB128_512 ...) +(ConvertToInt8SaturatedInt16x8 ...) => (VPMOVSWB128_128 ...) +(ConvertToInt8SaturatedInt16x16 ...) => (VPMOVSWB128_256 ...) (ConvertToInt8SaturatedInt16x32 ...) => (VPMOVSWB256 ...) -(ConvertToInt8SaturatedInt32x4 ...) => (VPMOVSDB128 ...) -(ConvertToInt8SaturatedInt32x8 ...) => (VPMOVSDB128 ...) -(ConvertToInt8SaturatedInt32x16 ...) => (VPMOVSDB128 ...) -(ConvertToInt8SaturatedInt64x2 ...) => (VPMOVSQB128 ...) -(ConvertToInt8SaturatedInt64x4 ...) => (VPMOVSQB128 ...) -(ConvertToInt8SaturatedInt64x8 ...) => (VPMOVSQB128 ...) +(ConvertToInt8SaturatedInt32x4 ...) => (VPMOVSDB128_128 ...) +(ConvertToInt8SaturatedInt32x8 ...) => (VPMOVSDB128_256 ...) +(ConvertToInt8SaturatedInt32x16 ...) => (VPMOVSDB128_512 ...) +(ConvertToInt8SaturatedInt64x2 ...) => (VPMOVSQB128_128 ...) +(ConvertToInt8SaturatedInt64x4 ...) => (VPMOVSQB128_256 ...) +(ConvertToInt8SaturatedInt64x8 ...) => (VPMOVSQB128_512 ...) (ConvertToInt16Int8x16 ...) => (VPMOVSXBW256 ...) (ConvertToInt16Int8x32 ...) => (VPMOVSXBW512 ...) -(ConvertToInt16Int32x4 ...) => (VPMOVDW128 ...) -(ConvertToInt16Int32x8 ...) => (VPMOVDW128 ...) +(ConvertToInt16Int32x4 ...) => (VPMOVDW128_128 ...) +(ConvertToInt16Int32x8 ...) => (VPMOVDW128_256 ...) (ConvertToInt16Int32x16 ...) => (VPMOVDW256 ...) -(ConvertToInt16Int64x2 ...) => (VPMOVQW128 ...) -(ConvertToInt16Int64x4 ...) => (VPMOVQW128 ...) -(ConvertToInt16Int64x8 ...) => (VPMOVQW128 ...) -(ConvertToInt16SaturatedInt32x4 ...) => (VPMOVSDW128 ...) -(ConvertToInt16SaturatedInt32x8 ...) => (VPMOVSDW128 ...) +(ConvertToInt16Int64x2 ...) => (VPMOVQW128_128 ...) +(ConvertToInt16Int64x4 ...) => (VPMOVQW128_256 ...) +(ConvertToInt16Int64x8 ...) => (VPMOVQW128_512 ...) +(ConvertToInt16SaturatedInt32x4 ...) => (VPMOVSDW128_128 ...) +(ConvertToInt16SaturatedInt32x8 ...) => (VPMOVSDW128_256 ...) (ConvertToInt16SaturatedInt32x16 ...) => (VPMOVSDW256 ...) -(ConvertToInt16SaturatedInt64x2 ...) => (VPMOVSQW128 ...) -(ConvertToInt16SaturatedInt64x4 ...) => (VPMOVSQW128 ...) -(ConvertToInt16SaturatedInt64x8 ...) => (VPMOVSQW128 ...) +(ConvertToInt16SaturatedInt64x2 ...) => (VPMOVSQW128_128 ...) +(ConvertToInt16SaturatedInt64x4 ...) => (VPMOVSQW128_256 ...) +(ConvertToInt16SaturatedInt64x8 ...) => (VPMOVSQW128_512 ...) (ConvertToInt16SaturatedPackedInt32x4 ...) => (VPACKSSDW128 ...) (ConvertToInt16SaturatedPackedInt32x8 ...) => (VPACKSSDW256 ...) (ConvertToInt16SaturatedPackedInt32x16 ...) => (VPACKSSDW512 ...) @@ -260,11 +260,11 @@ (ConvertToInt32Int8x16 ...) => (VPMOVSXBD512 ...) (ConvertToInt32Int16x8 ...) => (VPMOVSXWD256 ...) (ConvertToInt32Int16x16 ...) => (VPMOVSXWD512 ...) -(ConvertToInt32Int64x2 ...) => (VPMOVQD128 ...) -(ConvertToInt32Int64x4 ...) => (VPMOVQD128 ...) +(ConvertToInt32Int64x2 ...) => (VPMOVQD128_128 ...) +(ConvertToInt32Int64x4 ...) => (VPMOVQD128_256 ...) (ConvertToInt32Int64x8 ...) => (VPMOVQD256 ...) -(ConvertToInt32SaturatedInt64x2 ...) => (VPMOVSQD128 ...) -(ConvertToInt32SaturatedInt64x4 ...) => (VPMOVSQD128 ...) +(ConvertToInt32SaturatedInt64x2 ...) => (VPMOVSQD128_128 ...) +(ConvertToInt32SaturatedInt64x4 ...) => (VPMOVSQD128_256 ...) (ConvertToInt32SaturatedInt64x8 ...) => (VPMOVSQD256 ...) (ConvertToInt32x4Int8x16 ...) => (VPMOVSXBD128 ...) (ConvertToInt32x4Int16x8 ...) => (VPMOVSXWD128 ...) @@ -277,38 +277,38 @@ (ConvertToInt64x2Int32x4 ...) => (VPMOVSXDQ128 ...) (ConvertToInt64x4Int8x16 ...) => (VPMOVSXBQ256 ...) (ConvertToInt64x8Int8x16 ...) => (VPMOVSXBQ512 ...) -(ConvertToUint8Uint16x8 ...) => (VPMOVWB128 ...) -(ConvertToUint8Uint16x16 ...) => (VPMOVWB128 ...) +(ConvertToUint8Uint16x8 ...) => (VPMOVWB128_128 ...) +(ConvertToUint8Uint16x16 ...) => (VPMOVWB128_256 ...) (ConvertToUint8Uint16x32 ...) => (VPMOVWB256 ...) -(ConvertToUint8Uint32x4 ...) => (VPMOVDB128 ...) -(ConvertToUint8Uint32x8 ...) => (VPMOVDB128 ...) -(ConvertToUint8Uint32x16 ...) => (VPMOVDB128 ...) -(ConvertToUint8Uint64x2 ...) => (VPMOVQB128 ...) -(ConvertToUint8Uint64x4 ...) => (VPMOVQB128 ...) -(ConvertToUint8Uint64x8 ...) => (VPMOVQB128 ...) -(ConvertToUint8SaturatedUint16x8 ...) => (VPMOVUSWB128 ...) -(ConvertToUint8SaturatedUint16x16 ...) => (VPMOVUSWB128 ...) +(ConvertToUint8Uint32x4 ...) => (VPMOVDB128_128 ...) +(ConvertToUint8Uint32x8 ...) => (VPMOVDB128_256 ...) +(ConvertToUint8Uint32x16 ...) => (VPMOVDB128_512 ...) +(ConvertToUint8Uint64x2 ...) => (VPMOVQB128_128 ...) +(ConvertToUint8Uint64x4 ...) => (VPMOVQB128_256 ...) +(ConvertToUint8Uint64x8 ...) => (VPMOVQB128_512 ...) +(ConvertToUint8SaturatedUint16x8 ...) => (VPMOVUSWB128_128 ...) +(ConvertToUint8SaturatedUint16x16 ...) => (VPMOVUSWB128_256 ...) (ConvertToUint8SaturatedUint16x32 ...) => (VPMOVUSWB256 ...) -(ConvertToUint8SaturatedUint32x4 ...) => (VPMOVUSDB128 ...) -(ConvertToUint8SaturatedUint32x8 ...) => (VPMOVUSDB128 ...) -(ConvertToUint8SaturatedUint32x16 ...) => (VPMOVUSDB128 ...) -(ConvertToUint8SaturatedUint64x2 ...) => (VPMOVUSQB128 ...) -(ConvertToUint8SaturatedUint64x4 ...) => (VPMOVUSQB128 ...) -(ConvertToUint8SaturatedUint64x8 ...) => (VPMOVUSQB128 ...) +(ConvertToUint8SaturatedUint32x4 ...) => (VPMOVUSDB128_128 ...) +(ConvertToUint8SaturatedUint32x8 ...) => (VPMOVUSDB128_256 ...) +(ConvertToUint8SaturatedUint32x16 ...) => (VPMOVUSDB128_512 ...) +(ConvertToUint8SaturatedUint64x2 ...) => (VPMOVUSQB128_128 ...) +(ConvertToUint8SaturatedUint64x4 ...) => (VPMOVUSQB128_256 ...) +(ConvertToUint8SaturatedUint64x8 ...) => (VPMOVUSQB128_512 ...) (ConvertToUint16Uint8x16 ...) => (VPMOVZXBW256 ...) (ConvertToUint16Uint8x32 ...) => (VPMOVZXBW512 ...) -(ConvertToUint16Uint32x4 ...) => (VPMOVDW128 ...) -(ConvertToUint16Uint32x8 ...) => (VPMOVDW128 ...) +(ConvertToUint16Uint32x4 ...) => (VPMOVDW128_128 ...) +(ConvertToUint16Uint32x8 ...) => (VPMOVDW128_256 ...) (ConvertToUint16Uint32x16 ...) => (VPMOVDW256 ...) -(ConvertToUint16Uint64x2 ...) => (VPMOVQW128 ...) -(ConvertToUint16Uint64x4 ...) => (VPMOVQW128 ...) -(ConvertToUint16Uint64x8 ...) => (VPMOVQW128 ...) -(ConvertToUint16SaturatedUint32x4 ...) => (VPMOVUSDW128 ...) -(ConvertToUint16SaturatedUint32x8 ...) => (VPMOVUSDW128 ...) +(ConvertToUint16Uint64x2 ...) => (VPMOVQW128_128 ...) +(ConvertToUint16Uint64x4 ...) => (VPMOVQW128_256 ...) +(ConvertToUint16Uint64x8 ...) => (VPMOVQW128_512 ...) +(ConvertToUint16SaturatedUint32x4 ...) => (VPMOVUSDW128_128 ...) +(ConvertToUint16SaturatedUint32x8 ...) => (VPMOVUSDW128_256 ...) (ConvertToUint16SaturatedUint32x16 ...) => (VPMOVUSDW256 ...) -(ConvertToUint16SaturatedUint64x2 ...) => (VPMOVUSQW128 ...) -(ConvertToUint16SaturatedUint64x4 ...) => (VPMOVUSQW128 ...) -(ConvertToUint16SaturatedUint64x8 ...) => (VPMOVUSQW128 ...) +(ConvertToUint16SaturatedUint64x2 ...) => (VPMOVUSQW128_128 ...) +(ConvertToUint16SaturatedUint64x4 ...) => (VPMOVUSQW128_256 ...) +(ConvertToUint16SaturatedUint64x8 ...) => (VPMOVUSQW128_512 ...) (ConvertToUint16SaturatedPackedUint32x4 ...) => (VPACKUSDW128 ...) (ConvertToUint16SaturatedPackedUint32x8 ...) => (VPACKUSDW256 ...) (ConvertToUint16SaturatedPackedUint32x16 ...) => (VPACKUSDW512 ...) @@ -319,11 +319,11 @@ (ConvertToUint32Uint8x16 ...) => (VPMOVZXBD512 ...) (ConvertToUint32Uint16x8 ...) => (VPMOVZXWD256 ...) (ConvertToUint32Uint16x16 ...) => (VPMOVZXWD512 ...) -(ConvertToUint32Uint64x2 ...) => (VPMOVQD128 ...) -(ConvertToUint32Uint64x4 ...) => (VPMOVQD128 ...) +(ConvertToUint32Uint64x2 ...) => (VPMOVQD128_128 ...) +(ConvertToUint32Uint64x4 ...) => (VPMOVQD128_256 ...) (ConvertToUint32Uint64x8 ...) => (VPMOVQD256 ...) -(ConvertToUint32SaturatedUint64x2 ...) => (VPMOVUSQD128 ...) -(ConvertToUint32SaturatedUint64x4 ...) => (VPMOVUSQD128 ...) +(ConvertToUint32SaturatedUint64x2 ...) => (VPMOVUSQD128_128 ...) +(ConvertToUint32SaturatedUint64x4 ...) => (VPMOVUSQD128_256 ...) (ConvertToUint32SaturatedUint64x8 ...) => (VPMOVUSQD256 ...) (ConvertToUint32x4Uint8x16 ...) => (VPMOVZXBD128 ...) (ConvertToUint32x4Uint16x8 ...) => (VPMOVZXWD128 ...) @@ -1423,22 +1423,38 @@ (VMOVDQU64Masked128 (VREDUCEPD128 [a] x) mask) => (VREDUCEPDMasked128 [a] x mask) (VMOVDQU64Masked256 (VREDUCEPD256 [a] x) mask) => (VREDUCEPDMasked256 [a] x mask) (VMOVDQU64Masked512 (VREDUCEPD512 [a] x) mask) => (VREDUCEPDMasked512 [a] x mask) -(VMOVDQU16Masked128 (VPMOVWB128 x) mask) => (VPMOVWBMasked128 x mask) +(VMOVDQU16Masked128 (VPMOVWB128_128 x) mask) => (VPMOVWBMasked128_128 x mask) +(VMOVDQU16Masked256 (VPMOVWB128_256 x) mask) => (VPMOVWBMasked128_256 x mask) (VMOVDQU16Masked256 (VPMOVWB256 x) mask) => (VPMOVWBMasked256 x mask) -(VMOVDQU32Masked128 (VPMOVDB128 x) mask) => (VPMOVDBMasked128 x mask) -(VMOVDQU64Masked128 (VPMOVQB128 x) mask) => (VPMOVQBMasked128 x mask) -(VMOVDQU16Masked128 (VPMOVSWB128 x) mask) => (VPMOVSWBMasked128 x mask) +(VMOVDQU32Masked128 (VPMOVDB128_128 x) mask) => (VPMOVDBMasked128_128 x mask) +(VMOVDQU32Masked256 (VPMOVDB128_256 x) mask) => (VPMOVDBMasked128_256 x mask) +(VMOVDQU32Masked512 (VPMOVDB128_512 x) mask) => (VPMOVDBMasked128_512 x mask) +(VMOVDQU64Masked128 (VPMOVQB128_128 x) mask) => (VPMOVQBMasked128_128 x mask) +(VMOVDQU64Masked256 (VPMOVQB128_256 x) mask) => (VPMOVQBMasked128_256 x mask) +(VMOVDQU64Masked512 (VPMOVQB128_512 x) mask) => (VPMOVQBMasked128_512 x mask) +(VMOVDQU16Masked128 (VPMOVSWB128_128 x) mask) => (VPMOVSWBMasked128_128 x mask) +(VMOVDQU16Masked256 (VPMOVSWB128_256 x) mask) => (VPMOVSWBMasked128_256 x mask) (VMOVDQU16Masked256 (VPMOVSWB256 x) mask) => (VPMOVSWBMasked256 x mask) -(VMOVDQU32Masked128 (VPMOVSDB128 x) mask) => (VPMOVSDBMasked128 x mask) -(VMOVDQU64Masked128 (VPMOVSQB128 x) mask) => (VPMOVSQBMasked128 x mask) +(VMOVDQU32Masked128 (VPMOVSDB128_128 x) mask) => (VPMOVSDBMasked128_128 x mask) +(VMOVDQU32Masked256 (VPMOVSDB128_256 x) mask) => (VPMOVSDBMasked128_256 x mask) +(VMOVDQU32Masked512 (VPMOVSDB128_512 x) mask) => (VPMOVSDBMasked128_512 x mask) +(VMOVDQU64Masked128 (VPMOVSQB128_128 x) mask) => (VPMOVSQBMasked128_128 x mask) +(VMOVDQU64Masked256 (VPMOVSQB128_256 x) mask) => (VPMOVSQBMasked128_256 x mask) +(VMOVDQU64Masked512 (VPMOVSQB128_512 x) mask) => (VPMOVSQBMasked128_512 x mask) (VMOVDQU8Masked256 (VPMOVSXBW256 x) mask) => (VPMOVSXBWMasked256 x mask) (VMOVDQU8Masked512 (VPMOVSXBW512 x) mask) => (VPMOVSXBWMasked512 x mask) -(VMOVDQU32Masked128 (VPMOVDW128 x) mask) => (VPMOVDWMasked128 x mask) +(VMOVDQU32Masked128 (VPMOVDW128_128 x) mask) => (VPMOVDWMasked128_128 x mask) +(VMOVDQU32Masked256 (VPMOVDW128_256 x) mask) => (VPMOVDWMasked128_256 x mask) (VMOVDQU32Masked256 (VPMOVDW256 x) mask) => (VPMOVDWMasked256 x mask) -(VMOVDQU64Masked128 (VPMOVQW128 x) mask) => (VPMOVQWMasked128 x mask) -(VMOVDQU32Masked128 (VPMOVSDW128 x) mask) => (VPMOVSDWMasked128 x mask) +(VMOVDQU64Masked128 (VPMOVQW128_128 x) mask) => (VPMOVQWMasked128_128 x mask) +(VMOVDQU64Masked256 (VPMOVQW128_256 x) mask) => (VPMOVQWMasked128_256 x mask) +(VMOVDQU64Masked512 (VPMOVQW128_512 x) mask) => (VPMOVQWMasked128_512 x mask) +(VMOVDQU32Masked128 (VPMOVSDW128_128 x) mask) => (VPMOVSDWMasked128_128 x mask) +(VMOVDQU32Masked256 (VPMOVSDW128_256 x) mask) => (VPMOVSDWMasked128_256 x mask) (VMOVDQU32Masked256 (VPMOVSDW256 x) mask) => (VPMOVSDWMasked256 x mask) -(VMOVDQU64Masked128 (VPMOVSQW128 x) mask) => (VPMOVSQWMasked128 x mask) +(VMOVDQU64Masked128 (VPMOVSQW128_128 x) mask) => (VPMOVSQWMasked128_128 x mask) +(VMOVDQU64Masked256 (VPMOVSQW128_256 x) mask) => (VPMOVSQWMasked128_256 x mask) +(VMOVDQU64Masked512 (VPMOVSQW128_512 x) mask) => (VPMOVSQWMasked128_512 x mask) (VMOVDQU32Masked128 (VPACKSSDW128 x y) mask) => (VPACKSSDWMasked128 x y mask) (VMOVDQU32Masked256 (VPACKSSDW256 x y) mask) => (VPACKSSDWMasked256 x y mask) (VMOVDQU32Masked512 (VPACKSSDW512 x y) mask) => (VPACKSSDWMasked512 x y mask) @@ -1449,9 +1465,11 @@ (VMOVDQU8Masked512 (VPMOVSXBD512 x) mask) => (VPMOVSXBDMasked512 x mask) (VMOVDQU16Masked256 (VPMOVSXWD256 x) mask) => (VPMOVSXWDMasked256 x mask) (VMOVDQU16Masked512 (VPMOVSXWD512 x) mask) => (VPMOVSXWDMasked512 x mask) -(VMOVDQU64Masked128 (VPMOVQD128 x) mask) => (VPMOVQDMasked128 x mask) +(VMOVDQU64Masked128 (VPMOVQD128_128 x) mask) => (VPMOVQDMasked128_128 x mask) +(VMOVDQU64Masked256 (VPMOVQD128_256 x) mask) => (VPMOVQDMasked128_256 x mask) (VMOVDQU64Masked256 (VPMOVQD256 x) mask) => (VPMOVQDMasked256 x mask) -(VMOVDQU64Masked128 (VPMOVSQD128 x) mask) => (VPMOVSQDMasked128 x mask) +(VMOVDQU64Masked128 (VPMOVSQD128_128 x) mask) => (VPMOVSQDMasked128_128 x mask) +(VMOVDQU64Masked256 (VPMOVSQD128_256 x) mask) => (VPMOVSQDMasked128_256 x mask) (VMOVDQU64Masked256 (VPMOVSQD256 x) mask) => (VPMOVSQDMasked256 x mask) (VMOVDQU8Masked128 (VPMOVSXBD128 x) mask) => (VPMOVSXBDMasked128 x mask) (VMOVDQU16Masked128 (VPMOVSXWD128 x) mask) => (VPMOVSXWDMasked128 x mask) @@ -1464,15 +1482,23 @@ (VMOVDQU32Masked128 (VPMOVSXDQ128 x) mask) => (VPMOVSXDQMasked128 x mask) (VMOVDQU8Masked256 (VPMOVSXBQ256 x) mask) => (VPMOVSXBQMasked256 x mask) (VMOVDQU8Masked512 (VPMOVSXBQ512 x) mask) => (VPMOVSXBQMasked512 x mask) -(VMOVDQU16Masked128 (VPMOVUSWB128 x) mask) => (VPMOVUSWBMasked128 x mask) +(VMOVDQU16Masked128 (VPMOVUSWB128_128 x) mask) => (VPMOVUSWBMasked128_128 x mask) +(VMOVDQU16Masked256 (VPMOVUSWB128_256 x) mask) => (VPMOVUSWBMasked128_256 x mask) (VMOVDQU16Masked256 (VPMOVUSWB256 x) mask) => (VPMOVUSWBMasked256 x mask) -(VMOVDQU32Masked128 (VPMOVUSDB128 x) mask) => (VPMOVUSDBMasked128 x mask) -(VMOVDQU64Masked128 (VPMOVUSQB128 x) mask) => (VPMOVUSQBMasked128 x mask) +(VMOVDQU32Masked128 (VPMOVUSDB128_128 x) mask) => (VPMOVUSDBMasked128_128 x mask) +(VMOVDQU32Masked256 (VPMOVUSDB128_256 x) mask) => (VPMOVUSDBMasked128_256 x mask) +(VMOVDQU32Masked512 (VPMOVUSDB128_512 x) mask) => (VPMOVUSDBMasked128_512 x mask) +(VMOVDQU64Masked128 (VPMOVUSQB128_128 x) mask) => (VPMOVUSQBMasked128_128 x mask) +(VMOVDQU64Masked256 (VPMOVUSQB128_256 x) mask) => (VPMOVUSQBMasked128_256 x mask) +(VMOVDQU64Masked512 (VPMOVUSQB128_512 x) mask) => (VPMOVUSQBMasked128_512 x mask) (VMOVDQU8Masked256 (VPMOVZXBW256 x) mask) => (VPMOVZXBWMasked256 x mask) (VMOVDQU8Masked512 (VPMOVZXBW512 x) mask) => (VPMOVZXBWMasked512 x mask) -(VMOVDQU32Masked128 (VPMOVUSDW128 x) mask) => (VPMOVUSDWMasked128 x mask) +(VMOVDQU32Masked128 (VPMOVUSDW128_128 x) mask) => (VPMOVUSDWMasked128_128 x mask) +(VMOVDQU32Masked256 (VPMOVUSDW128_256 x) mask) => (VPMOVUSDWMasked128_256 x mask) (VMOVDQU32Masked256 (VPMOVUSDW256 x) mask) => (VPMOVUSDWMasked256 x mask) -(VMOVDQU64Masked128 (VPMOVUSQW128 x) mask) => (VPMOVUSQWMasked128 x mask) +(VMOVDQU64Masked128 (VPMOVUSQW128_128 x) mask) => (VPMOVUSQWMasked128_128 x mask) +(VMOVDQU64Masked256 (VPMOVUSQW128_256 x) mask) => (VPMOVUSQWMasked128_256 x mask) +(VMOVDQU64Masked512 (VPMOVUSQW128_512 x) mask) => (VPMOVUSQWMasked128_512 x mask) (VMOVDQU32Masked128 (VPACKUSDW128 x y) mask) => (VPACKUSDWMasked128 x y mask) (VMOVDQU32Masked256 (VPACKUSDW256 x y) mask) => (VPACKUSDWMasked256 x y mask) (VMOVDQU32Masked512 (VPACKUSDW512 x y) mask) => (VPACKUSDWMasked512 x y mask) @@ -1483,7 +1509,8 @@ (VMOVDQU8Masked512 (VPMOVZXBD512 x) mask) => (VPMOVZXBDMasked512 x mask) (VMOVDQU16Masked256 (VPMOVZXWD256 x) mask) => (VPMOVZXWDMasked256 x mask) (VMOVDQU16Masked512 (VPMOVZXWD512 x) mask) => (VPMOVZXWDMasked512 x mask) -(VMOVDQU64Masked128 (VPMOVUSQD128 x) mask) => (VPMOVUSQDMasked128 x mask) +(VMOVDQU64Masked128 (VPMOVUSQD128_128 x) mask) => (VPMOVUSQDMasked128_128 x mask) +(VMOVDQU64Masked256 (VPMOVUSQD128_256 x) mask) => (VPMOVUSQDMasked128_256 x mask) (VMOVDQU64Masked256 (VPMOVUSQD256 x) mask) => (VPMOVUSQDMasked256 x mask) (VMOVDQU8Masked128 (VPMOVZXBD128 x) mask) => (VPMOVZXBDMasked128 x mask) (VMOVDQU16Masked128 (VPMOVZXWD128 x) mask) => (VPMOVZXWDMasked128 x mask) @@ -1862,424 +1889,451 @@ (VMOVDQU64Masked128 (VPSRAQ128const [a] x) mask) => (VPSRAQMasked128const [a] x mask) (VMOVDQU64Masked256 (VPSRAQ256const [a] x) mask) => (VPSRAQMasked256const [a] x mask) (VMOVDQU64Masked512 (VPSRAQ512const [a] x) mask) => (VPSRAQMasked512const [a] x mask) -(VPBLENDMQMasked512 dst (VPSLLQ512const [a] x) mask) => (VPSLLQMasked512constMerging dst [a] x mask) -(VPBLENDVB256 dst (VPMOVSXBW512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBWMasked512Merging dst x (VPMOVVec8x32ToM mask)) -(VPBLENDMDMasked512 dst (VPMOVSDW256 x) mask) => (VPMOVSDWMasked256Merging dst x mask) -(VPBLENDMDMasked512 dst (VPLZCNTD512 x) mask) => (VPLZCNTDMasked512Merging dst x mask) -(VPBLENDMWMasked512 dst (VPMAXSW512 x y) mask) => (VPMAXSWMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VPMINUD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) -(VPBLENDMWMasked512 dst (VPMULHW512 x y) mask) => (VPMULHWMasked512Merging dst x y mask) -(VPBLENDMDMasked512 dst (VPMULLD512 x y) mask) => (VPMULLDMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VPROLQ128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLQMasked128Merging dst [a] x (VPMOVVec64x2ToM mask)) -(VPBLENDVB128 dst (VPMADDUBSW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMADDUBSWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) -(VPBLENDVB128 dst (VPMAXSB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) -(VPBLENDVB128 dst (VPADDSB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDSBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) -(VPBLENDVB256 dst (VPADDUSB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDUSBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) -(VPBLENDVB128 dst (VBROADCASTSS256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VBROADCASTSSMasked256Merging dst x (VPMOVVec32x4ToM mask)) -(VPBLENDVB128 dst (VPMOVSXBW128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBWMasked128Merging dst x (VPMOVVec8x16ToM mask)) -(VPBLENDVB128 dst (VPMINSQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) -(VPBLENDVB256 dst (VMULPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMULPSMasked256Merging dst x y (VPMOVVec32x8ToM mask)) (VPBLENDMBMasked512 dst (VGF2P8MULB512 x y) mask) => (VGF2P8MULBMasked512Merging dst x y mask) -(VPBLENDMDMasked512 dst (VMAXPS512 x y) mask) => (VMAXPSMasked512Merging dst x y mask) -(VPBLENDVB256 dst (VPOPCNTB256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPOPCNTBMasked256Merging dst x (VPMOVVec8x32ToM mask)) -(VPBLENDVB256 dst (VSUBPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSUBPSMasked256Merging dst x y (VPMOVVec32x8ToM mask)) -(VPBLENDMQMasked512 dst (VPSUBQ512 x y) mask) => (VPSUBQMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VPSUBUSW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBUSWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) -(VPBLENDVB128 dst (VPMOVSXBQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBQMasked512Merging dst x (VPMOVVec8x16ToM mask)) -(VPBLENDMDMasked512 dst (VPMOVUSDB128 x) mask) => (VPMOVUSDBMasked128Merging dst x mask) -(VPBLENDVB256 dst (VPMAXUQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) -(VPBLENDMDMasked512 dst (VRSQRT14PS512 x) mask) => (VRSQRT14PSMasked512Merging dst x mask) -(VPBLENDVB256 dst (VPROLD256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLDMasked256Merging dst [a] x (VPMOVVec32x8ToM mask)) -(VPBLENDMQMasked512 dst (VPROLQ512 [a] x) mask) => (VPROLQMasked512Merging dst [a] x mask) -(VPBLENDMQMasked512 dst (VPSLLVQ512 x y) mask) => (VPSLLVQMasked512Merging dst x y mask) -(VPBLENDVB256 dst (VPSRAVD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) -(VPBLENDVB256 dst (VADDPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VADDPSMasked256Merging dst x y (VPMOVVec32x8ToM mask)) -(VPBLENDVB256 dst (VPMOVSXDQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXDQMasked512Merging dst x (VPMOVVec32x8ToM mask)) -(VPBLENDVB256 dst (VPMOVUSWB128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSWBMasked128Merging dst x (VPMOVVec16x16ToM mask)) -(VPBLENDVB128 dst (VPMOVZXWQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXWQMasked256Merging dst x (VPMOVVec16x8ToM mask)) -(VPBLENDVB128 dst (VPMULLW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULLWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) +(VPBLENDMBMasked512 dst (VPABSB512 x) mask) => (VPABSBMasked512Merging dst x mask) +(VPBLENDMBMasked512 dst (VPADDB512 x y) mask) => (VPADDBMasked512Merging dst x y mask) +(VPBLENDMBMasked512 dst (VPADDSB512 x y) mask) => (VPADDSBMasked512Merging dst x y mask) +(VPBLENDMBMasked512 dst (VPADDUSB512 x y) mask) => (VPADDUSBMasked512Merging dst x y mask) +(VPBLENDMBMasked512 dst (VPAVGB512 x y) mask) => (VPAVGBMasked512Merging dst x y mask) +(VPBLENDMBMasked512 dst (VPMAXSB512 x y) mask) => (VPMAXSBMasked512Merging dst x y mask) +(VPBLENDMBMasked512 dst (VPMAXUB512 x y) mask) => (VPMAXUBMasked512Merging dst x y mask) +(VPBLENDMBMasked512 dst (VPMINSB512 x y) mask) => (VPMINSBMasked512Merging dst x y mask) +(VPBLENDMBMasked512 dst (VPMINUB512 x y) mask) => (VPMINUBMasked512Merging dst x y mask) (VPBLENDMBMasked512 dst (VPOPCNTB512 x) mask) => (VPOPCNTBMasked512Merging dst x mask) -(VPBLENDVB128 dst (VPSHLDQ128 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHLDQMasked128Merging dst [a] x y (VPMOVVec64x2ToM mask)) -(VPBLENDVB256 dst (VPSRAQ256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAQMasked256constMerging dst [a] x (VPMOVVec64x4ToM mask)) +(VPBLENDMBMasked512 dst (VPSHUFB512 x y) mask) => (VPSHUFBMasked512Merging dst x y mask) +(VPBLENDMBMasked512 dst (VPSUBB512 x y) mask) => (VPSUBBMasked512Merging dst x y mask) +(VPBLENDMBMasked512 dst (VPSUBSB512 x y) mask) => (VPSUBSBMasked512Merging dst x y mask) +(VPBLENDMBMasked512 dst (VPSUBUSB512 x y) mask) => (VPSUBUSBMasked512Merging dst x y mask) +(VPBLENDMDMasked512 dst (VADDPS512 x y) mask) => (VADDPSMasked512Merging dst x y mask) +(VPBLENDMDMasked512 dst (VCVTPS2UDQ512 x) mask) => (VCVTPS2UDQMasked512Merging dst x mask) +(VPBLENDMDMasked512 dst (VCVTTPS2DQ512 x) mask) => (VCVTTPS2DQMasked512Merging dst x mask) +(VPBLENDMDMasked512 dst (VDIVPS512 x y) mask) => (VDIVPSMasked512Merging dst x y mask) +(VPBLENDMDMasked512 dst (VMAXPS512 x y) mask) => (VMAXPSMasked512Merging dst x y mask) +(VPBLENDMDMasked512 dst (VMINPS512 x y) mask) => (VMINPSMasked512Merging dst x y mask) +(VPBLENDMDMasked512 dst (VMULPS512 x y) mask) => (VMULPSMasked512Merging dst x y mask) +(VPBLENDMDMasked512 dst (VPABSD512 x) mask) => (VPABSDMasked512Merging dst x mask) +(VPBLENDMDMasked512 dst (VPACKSSDW512 x y) mask) => (VPACKSSDWMasked512Merging dst x y mask) +(VPBLENDMDMasked512 dst (VPACKUSDW512 x y) mask) => (VPACKUSDWMasked512Merging dst x y mask) +(VPBLENDMDMasked512 dst (VPADDD512 x y) mask) => (VPADDDMasked512Merging dst x y mask) +(VPBLENDMDMasked512 dst (VPANDD512 x y) mask) => (VPANDDMasked512Merging dst x y mask) +(VPBLENDMDMasked512 dst (VPLZCNTD512 x) mask) => (VPLZCNTDMasked512Merging dst x mask) +(VPBLENDMDMasked512 dst (VPMAXSD512 x y) mask) => (VPMAXSDMasked512Merging dst x y mask) +(VPBLENDMDMasked512 dst (VPMAXUD512 x y) mask) => (VPMAXUDMasked512Merging dst x y mask) +(VPBLENDMDMasked512 dst (VPMINSD512 x y) mask) => (VPMINSDMasked512Merging dst x y mask) +(VPBLENDMDMasked512 dst (VPMINUD512 x y) mask) => (VPMINUDMasked512Merging dst x y mask) +(VPBLENDMDMasked512 dst (VPMOVDB128_512 x) mask) => (VPMOVDBMasked128_512Merging dst x mask) (VPBLENDMDMasked512 dst (VPMOVDW256 x) mask) => (VPMOVDWMasked256Merging dst x mask) -(VPBLENDMQMasked512 dst (VPMOVUSQB128 x) mask) => (VPMOVUSQBMasked128Merging dst x mask) -(VPBLENDVB256 dst (VCVTPS2UDQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VCVTPS2UDQMasked256Merging dst x (VPMOVVec32x8ToM mask)) -(VPBLENDVB128 dst (VPMOVZXBQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBQMasked256Merging dst x (VPMOVVec8x16ToM mask)) -(VPBLENDVB128 dst (VPMAXSQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) -(VPBLENDVB256 dst (VPMINSW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) -(VPBLENDVB128 dst (VPOPCNTW128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPOPCNTWMasked128Merging dst x (VPMOVVec16x8ToM mask)) +(VPBLENDMDMasked512 dst (VPMOVSDB128_512 x) mask) => (VPMOVSDBMasked128_512Merging dst x mask) +(VPBLENDMDMasked512 dst (VPMOVSDW256 x) mask) => (VPMOVSDWMasked256Merging dst x mask) +(VPBLENDMDMasked512 dst (VPMOVUSDB128_512 x) mask) => (VPMOVUSDBMasked128_512Merging dst x mask) +(VPBLENDMDMasked512 dst (VPMOVUSDW256 x) mask) => (VPMOVUSDWMasked256Merging dst x mask) +(VPBLENDMDMasked512 dst (VPMULLD512 x y) mask) => (VPMULLDMasked512Merging dst x y mask) +(VPBLENDMDMasked512 dst (VPOPCNTD512 x) mask) => (VPOPCNTDMasked512Merging dst x mask) +(VPBLENDMDMasked512 dst (VPORD512 x y) mask) => (VPORDMasked512Merging dst x y mask) +(VPBLENDMDMasked512 dst (VPROLD512 [a] x) mask) => (VPROLDMasked512Merging dst [a] x mask) +(VPBLENDMDMasked512 dst (VPROLVD512 x y) mask) => (VPROLVDMasked512Merging dst x y mask) +(VPBLENDMDMasked512 dst (VPRORD512 [a] x) mask) => (VPRORDMasked512Merging dst [a] x mask) +(VPBLENDMDMasked512 dst (VPRORVD512 x y) mask) => (VPRORVDMasked512Merging dst x y mask) +(VPBLENDMDMasked512 dst (VPSHLDD512 [a] x y) mask) => (VPSHLDDMasked512Merging dst [a] x y mask) +(VPBLENDMDMasked512 dst (VPSHRDD512 [a] x y) mask) => (VPSHRDDMasked512Merging dst [a] x y mask) +(VPBLENDMDMasked512 dst (VPSHUFD512 [a] x) mask) => (VPSHUFDMasked512Merging dst [a] x mask) +(VPBLENDMDMasked512 dst (VPSLLD512const [a] x) mask) => (VPSLLDMasked512constMerging dst [a] x mask) +(VPBLENDMDMasked512 dst (VPSLLVD512 x y) mask) => (VPSLLVDMasked512Merging dst x y mask) +(VPBLENDMDMasked512 dst (VPSRAD512const [a] x) mask) => (VPSRADMasked512constMerging dst [a] x mask) +(VPBLENDMDMasked512 dst (VPSRAVD512 x y) mask) => (VPSRAVDMasked512Merging dst x y mask) +(VPBLENDMDMasked512 dst (VPSRLVD512 x y) mask) => (VPSRLVDMasked512Merging dst x y mask) +(VPBLENDMDMasked512 dst (VPSUBD512 x y) mask) => (VPSUBDMasked512Merging dst x y mask) +(VPBLENDMDMasked512 dst (VPXORD512 x y) mask) => (VPXORDMasked512Merging dst x y mask) (VPBLENDMDMasked512 dst (VRCP14PS512 x) mask) => (VRCP14PSMasked512Merging dst x mask) -(VPBLENDVB128 dst (VPBROADCASTW128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTWMasked128Merging dst x (VPMOVVec16x8ToM mask)) -(VPBLENDMWMasked512 dst (VPMOVWB256 x) mask) => (VPMOVWBMasked256Merging dst x mask) -(VPBLENDVB128 dst (VPRORVD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORVDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) -(VPBLENDVB256 dst (VPSHLDD256 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHLDDMasked256Merging dst [a] x y (VPMOVVec32x8ToM mask)) -(VPBLENDVB256 dst (VPSLLVW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLVWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) -(VPBLENDVB256 dst (VPSRLVQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) -(VPBLENDVB256 dst (VPSUBUSB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBUSBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) (VPBLENDMDMasked512 dst (VREDUCEPS512 [a] x) mask) => (VREDUCEPSMasked512Merging dst [a] x mask) -(VPBLENDVB256 dst (VPMAXSW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) -(VPBLENDVB256 dst (VMINPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMINPSMasked256Merging dst x y (VPMOVVec32x8ToM mask)) +(VPBLENDMDMasked512 dst (VRNDSCALEPS512 [a] x) mask) => (VRNDSCALEPSMasked512Merging dst [a] x mask) +(VPBLENDMDMasked512 dst (VRSQRT14PS512 x) mask) => (VRSQRT14PSMasked512Merging dst x mask) +(VPBLENDMDMasked512 dst (VSCALEFPS512 x y) mask) => (VSCALEFPSMasked512Merging dst x y mask) +(VPBLENDMDMasked512 dst (VSQRTPS512 x) mask) => (VSQRTPSMasked512Merging dst x mask) +(VPBLENDMDMasked512 dst (VSUBPS512 x y) mask) => (VSUBPSMasked512Merging dst x y mask) +(VPBLENDMQMasked512 dst (VADDPD512 x y) mask) => (VADDPDMasked512Merging dst x y mask) +(VPBLENDMQMasked512 dst (VDIVPD512 x y) mask) => (VDIVPDMasked512Merging dst x y mask) +(VPBLENDMQMasked512 dst (VMAXPD512 x y) mask) => (VMAXPDMasked512Merging dst x y mask) +(VPBLENDMQMasked512 dst (VMINPD512 x y) mask) => (VMINPDMasked512Merging dst x y mask) +(VPBLENDMQMasked512 dst (VMULPD512 x y) mask) => (VMULPDMasked512Merging dst x y mask) +(VPBLENDMQMasked512 dst (VPABSQ512 x) mask) => (VPABSQMasked512Merging dst x mask) (VPBLENDMQMasked512 dst (VPADDQ512 x y) mask) => (VPADDQMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VBROADCASTSD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VBROADCASTSDMasked256Merging dst x (VPMOVVec64x2ToM mask)) +(VPBLENDMQMasked512 dst (VPANDQ512 x y) mask) => (VPANDQMasked512Merging dst x y mask) +(VPBLENDMQMasked512 dst (VPLZCNTQ512 x) mask) => (VPLZCNTQMasked512Merging dst x mask) +(VPBLENDMQMasked512 dst (VPMAXSQ512 x y) mask) => (VPMAXSQMasked512Merging dst x y mask) +(VPBLENDMQMasked512 dst (VPMAXUQ512 x y) mask) => (VPMAXUQMasked512Merging dst x y mask) +(VPBLENDMQMasked512 dst (VPMINSQ512 x y) mask) => (VPMINSQMasked512Merging dst x y mask) +(VPBLENDMQMasked512 dst (VPMINUQ512 x y) mask) => (VPMINUQMasked512Merging dst x y mask) +(VPBLENDMQMasked512 dst (VPMOVQB128_512 x) mask) => (VPMOVQBMasked128_512Merging dst x mask) +(VPBLENDMQMasked512 dst (VPMOVQD256 x) mask) => (VPMOVQDMasked256Merging dst x mask) +(VPBLENDMQMasked512 dst (VPMOVQW128_512 x) mask) => (VPMOVQWMasked128_512Merging dst x mask) +(VPBLENDMQMasked512 dst (VPMOVSQB128_512 x) mask) => (VPMOVSQBMasked128_512Merging dst x mask) +(VPBLENDMQMasked512 dst (VPMOVSQD256 x) mask) => (VPMOVSQDMasked256Merging dst x mask) +(VPBLENDMQMasked512 dst (VPMOVSQW128_512 x) mask) => (VPMOVSQWMasked128_512Merging dst x mask) +(VPBLENDMQMasked512 dst (VPMOVUSQB128_512 x) mask) => (VPMOVUSQBMasked128_512Merging dst x mask) +(VPBLENDMQMasked512 dst (VPMOVUSQD256 x) mask) => (VPMOVUSQDMasked256Merging dst x mask) +(VPBLENDMQMasked512 dst (VPMOVUSQW128_512 x) mask) => (VPMOVUSQWMasked128_512Merging dst x mask) +(VPBLENDMQMasked512 dst (VPMULLQ512 x y) mask) => (VPMULLQMasked512Merging dst x y mask) +(VPBLENDMQMasked512 dst (VPOPCNTQ512 x) mask) => (VPOPCNTQMasked512Merging dst x mask) +(VPBLENDMQMasked512 dst (VPORQ512 x y) mask) => (VPORQMasked512Merging dst x y mask) +(VPBLENDMQMasked512 dst (VPROLQ512 [a] x) mask) => (VPROLQMasked512Merging dst [a] x mask) +(VPBLENDMQMasked512 dst (VPROLVQ512 x y) mask) => (VPROLVQMasked512Merging dst x y mask) +(VPBLENDMQMasked512 dst (VPRORQ512 [a] x) mask) => (VPRORQMasked512Merging dst [a] x mask) +(VPBLENDMQMasked512 dst (VPRORVQ512 x y) mask) => (VPRORVQMasked512Merging dst x y mask) +(VPBLENDMQMasked512 dst (VPSHLDQ512 [a] x y) mask) => (VPSHLDQMasked512Merging dst [a] x y mask) +(VPBLENDMQMasked512 dst (VPSHRDQ512 [a] x y) mask) => (VPSHRDQMasked512Merging dst [a] x y mask) +(VPBLENDMQMasked512 dst (VPSLLQ512const [a] x) mask) => (VPSLLQMasked512constMerging dst [a] x mask) +(VPBLENDMQMasked512 dst (VPSLLVQ512 x y) mask) => (VPSLLVQMasked512Merging dst x y mask) +(VPBLENDMQMasked512 dst (VPSRAQ512const [a] x) mask) => (VPSRAQMasked512constMerging dst [a] x mask) +(VPBLENDMQMasked512 dst (VPSRAVQ512 x y) mask) => (VPSRAVQMasked512Merging dst x y mask) +(VPBLENDMQMasked512 dst (VPSRLVQ512 x y) mask) => (VPSRLVQMasked512Merging dst x y mask) +(VPBLENDMQMasked512 dst (VPSUBQ512 x y) mask) => (VPSUBQMasked512Merging dst x y mask) +(VPBLENDMQMasked512 dst (VPXORQ512 x y) mask) => (VPXORQMasked512Merging dst x y mask) +(VPBLENDMQMasked512 dst (VRCP14PD512 x) mask) => (VRCP14PDMasked512Merging dst x mask) +(VPBLENDMQMasked512 dst (VREDUCEPD512 [a] x) mask) => (VREDUCEPDMasked512Merging dst [a] x mask) (VPBLENDMQMasked512 dst (VRNDSCALEPD512 [a] x) mask) => (VRNDSCALEPDMasked512Merging dst [a] x mask) -(VPBLENDVB128 dst (VPMOVZXDQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXDQMasked128Merging dst x (VPMOVVec32x4ToM mask)) -(VPBLENDVB256 dst (VPMINSD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) -(VPBLENDVB128 dst (VPSRAQ128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAQMasked128constMerging dst [a] x (VPMOVVec64x2ToM mask)) -(VPBLENDVB256 dst (VPADDSW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDSWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) -(VPBLENDVB256 dst (VRNDSCALEPS256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRNDSCALEPSMasked256Merging dst [a] x (VPMOVVec32x8ToM mask)) -(VPBLENDVB128 dst (VPACKUSDW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPACKUSDWMasked128Merging dst x y (VPMOVVec32x4ToM mask)) +(VPBLENDMQMasked512 dst (VRSQRT14PD512 x) mask) => (VRSQRT14PDMasked512Merging dst x mask) +(VPBLENDMQMasked512 dst (VSCALEFPD512 x y) mask) => (VSCALEFPDMasked512Merging dst x y mask) +(VPBLENDMQMasked512 dst (VSQRTPD512 x) mask) => (VSQRTPDMasked512Merging dst x mask) +(VPBLENDMQMasked512 dst (VSUBPD512 x y) mask) => (VSUBPDMasked512Merging dst x y mask) +(VPBLENDMWMasked512 dst (VPABSW512 x) mask) => (VPABSWMasked512Merging dst x mask) +(VPBLENDMWMasked512 dst (VPADDSW512 x y) mask) => (VPADDSWMasked512Merging dst x y mask) +(VPBLENDMWMasked512 dst (VPADDUSW512 x y) mask) => (VPADDUSWMasked512Merging dst x y mask) +(VPBLENDMWMasked512 dst (VPADDW512 x y) mask) => (VPADDWMasked512Merging dst x y mask) +(VPBLENDMWMasked512 dst (VPAVGW512 x y) mask) => (VPAVGWMasked512Merging dst x y mask) (VPBLENDMWMasked512 dst (VPMADDUBSW512 x y) mask) => (VPMADDUBSWMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VPLZCNTD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPLZCNTDMasked128Merging dst x (VPMOVVec32x4ToM mask)) -(VPBLENDVB128 dst (VPMAXUD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) -(VPBLENDVB128 dst (VPOPCNTB128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPOPCNTBMasked128Merging dst x (VPMOVVec8x16ToM mask)) -(VPBLENDVB256 dst (VPROLVQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLVQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) -(VPBLENDMQMasked512 dst (VPABSQ512 x) mask) => (VPABSQMasked512Merging dst x mask) -(VPBLENDVB128 dst (VBROADCASTSD512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VBROADCASTSDMasked512Merging dst x (VPMOVVec64x2ToM mask)) -(VPBLENDVB128 dst (VMINPD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMINPDMasked128Merging dst x y (VPMOVVec64x2ToM mask)) -(VPBLENDVB256 dst (VPMULHW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULHWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) +(VPBLENDMWMasked512 dst (VPMADDWD512 x y) mask) => (VPMADDWDMasked512Merging dst x y mask) +(VPBLENDMWMasked512 dst (VPMAXSW512 x y) mask) => (VPMAXSWMasked512Merging dst x y mask) +(VPBLENDMWMasked512 dst (VPMAXUW512 x y) mask) => (VPMAXUWMasked512Merging dst x y mask) +(VPBLENDMWMasked512 dst (VPMINSW512 x y) mask) => (VPMINSWMasked512Merging dst x y mask) +(VPBLENDMWMasked512 dst (VPMINUW512 x y) mask) => (VPMINUWMasked512Merging dst x y mask) +(VPBLENDMWMasked512 dst (VPMOVSWB256 x) mask) => (VPMOVSWBMasked256Merging dst x mask) +(VPBLENDMWMasked512 dst (VPMOVUSWB256 x) mask) => (VPMOVUSWBMasked256Merging dst x mask) +(VPBLENDMWMasked512 dst (VPMOVWB256 x) mask) => (VPMOVWBMasked256Merging dst x mask) +(VPBLENDMWMasked512 dst (VPMULHUW512 x y) mask) => (VPMULHUWMasked512Merging dst x y mask) +(VPBLENDMWMasked512 dst (VPMULHW512 x y) mask) => (VPMULHWMasked512Merging dst x y mask) +(VPBLENDMWMasked512 dst (VPMULLW512 x y) mask) => (VPMULLWMasked512Merging dst x y mask) +(VPBLENDMWMasked512 dst (VPOPCNTW512 x) mask) => (VPOPCNTWMasked512Merging dst x mask) (VPBLENDMWMasked512 dst (VPSHLDW512 [a] x y) mask) => (VPSHLDWMasked512Merging dst [a] x y mask) -(VPBLENDVB128 dst (VPSHRDW128 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHRDWMasked128Merging dst [a] x y (VPMOVVec16x8ToM mask)) +(VPBLENDMWMasked512 dst (VPSHRDW512 [a] x y) mask) => (VPSHRDWMasked512Merging dst [a] x y mask) +(VPBLENDMWMasked512 dst (VPSHUFHW512 [a] x) mask) => (VPSHUFHWMasked512Merging dst [a] x mask) +(VPBLENDMWMasked512 dst (VPSLLVW512 x y) mask) => (VPSLLVWMasked512Merging dst x y mask) +(VPBLENDMWMasked512 dst (VPSLLW512const [a] x) mask) => (VPSLLWMasked512constMerging dst [a] x mask) +(VPBLENDMWMasked512 dst (VPSRAVW512 x y) mask) => (VPSRAVWMasked512Merging dst x y mask) +(VPBLENDMWMasked512 dst (VPSRAW512const [a] x) mask) => (VPSRAWMasked512constMerging dst [a] x mask) +(VPBLENDMWMasked512 dst (VPSRLVW512 x y) mask) => (VPSRLVWMasked512Merging dst x y mask) +(VPBLENDMWMasked512 dst (VPSUBSW512 x y) mask) => (VPSUBSWMasked512Merging dst x y mask) +(VPBLENDMWMasked512 dst (VPSUBUSW512 x y) mask) => (VPSUBUSWMasked512Merging dst x y mask) +(VPBLENDMWMasked512 dst (VPSUBW512 x y) mask) => (VPSUBWMasked512Merging dst x y mask) (VPBLENDVB128 dst (VADDPD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VADDPDMasked128Merging dst x y (VPMOVVec64x2ToM mask)) -(VPBLENDVB128 dst (VPMOVZXWD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXWDMasked256Merging dst x (VPMOVVec16x8ToM mask)) -(VPBLENDVB128 dst (VPMOVSXWQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWQMasked256Merging dst x (VPMOVVec16x8ToM mask)) -(VPBLENDMDMasked512 dst (VDIVPS512 x y) mask) => (VDIVPSMasked512Merging dst x y mask) -(VPBLENDVB256 dst (VDIVPD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VDIVPDMasked256Merging dst x y (VPMOVVec64x4ToM mask)) -(VPBLENDVB256 dst (VPLZCNTQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPLZCNTQMasked256Merging dst x (VPMOVVec64x4ToM mask)) -(VPBLENDVB128 dst (VPSUBSW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBSWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) -(VPBLENDVB128 dst (VREDUCEPD128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VREDUCEPDMasked128Merging dst [a] x (VPMOVVec64x2ToM mask)) -(VPBLENDMQMasked512 dst (VPMOVUSQD256 x) mask) => (VPMOVUSQDMasked256Merging dst x mask) -(VPBLENDVB128 dst (VPMOVZXBD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBDMasked256Merging dst x (VPMOVVec8x16ToM mask)) -(VPBLENDMWMasked512 dst (VPMULHUW512 x y) mask) => (VPMULHUWMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VPRORQ128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORQMasked128Merging dst [a] x (VPMOVVec64x2ToM mask)) -(VPBLENDVB128 dst (VPSLLVW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLVWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) -(VPBLENDVB256 dst (VPSRLVD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) -(VPBLENDMBMasked512 dst (VPSUBSB512 x y) mask) => (VPSUBSBMasked512Merging dst x y mask) -(VPBLENDVB256 dst (VPADDD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) -(VPBLENDVB128 dst (VPMOVSXBW256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBWMasked256Merging dst x (VPMOVVec8x16ToM mask)) -(VPBLENDVB256 dst (VPMOVSDW128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSDWMasked128Merging dst x (VPMOVVec32x8ToM mask)) -(VPBLENDVB128 dst (VPMINSD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) (VPBLENDVB128 dst (VADDPS128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VADDPSMasked128Merging dst x y (VPMOVVec32x4ToM mask)) -(VPBLENDMQMasked512 dst (VADDPD512 x y) mask) => (VADDPDMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VPMOVSXBD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBDMasked256Merging dst x (VPMOVVec8x16ToM mask)) -(VPBLENDVB128 dst (VPMOVSXDQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXDQMasked128Merging dst x (VPMOVVec32x4ToM mask)) -(VPBLENDMWMasked512 dst (VPMOVUSWB256 x) mask) => (VPMOVUSWBMasked256Merging dst x mask) -(VPBLENDVB256 dst (VPOPCNTD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPOPCNTDMasked256Merging dst x (VPMOVVec32x8ToM mask)) -(VPBLENDVB128 dst (VPROLVD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLVDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) -(VPBLENDVB128 dst (VPSRLVQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) -(VPBLENDVB256 dst (VPADDUSW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDUSWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) -(VPBLENDVB128 dst (VPMAXSD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) -(VPBLENDVB128 dst (VPMINUB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) -(VPBLENDVB128 dst (VPMULLQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULLQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) -(VPBLENDVB256 dst (VSQRTPD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSQRTPDMasked256Merging dst x (VPMOVVec64x4ToM mask)) -(VPBLENDVB128 dst (VPSUBD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) -(VPBLENDVB256 dst (VREDUCEPS256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VREDUCEPSMasked256Merging dst [a] x (VPMOVVec32x8ToM mask)) -(VPBLENDMWMasked512 dst (VPMINSW512 x y) mask) => (VPMINSWMasked512Merging dst x y mask) -(VPBLENDMQMasked512 dst (VRCP14PD512 x) mask) => (VRCP14PDMasked512Merging dst x mask) -(VPBLENDMWMasked512 dst (VPSRAVW512 x y) mask) => (VPSRAVWMasked512Merging dst x y mask) -(VPBLENDMDMasked512 dst (VPSRLVD512 x y) mask) => (VPSRLVDMasked512Merging dst x y mask) -(VPBLENDMDMasked512 dst (VPSUBD512 x y) mask) => (VPSUBDMasked512Merging dst x y mask) -(VPBLENDVB256 dst (VPSUBQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) -(VPBLENDVB128 dst (VPBROADCASTD512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTDMasked512Merging dst x (VPMOVVec32x4ToM mask)) -(VPBLENDVB256 dst (VPMOVSXWD512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWDMasked512Merging dst x (VPMOVVec16x16ToM mask)) -(VPBLENDVB128 dst (VPMADDWD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMADDWDMasked128Merging dst x y (VPMOVVec16x8ToM mask)) -(VPBLENDVB256 dst (VGF2P8MULB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VGF2P8MULBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) -(VPBLENDVB128 dst (VPROLD128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLDMasked128Merging dst [a] x (VPMOVVec32x4ToM mask)) -(VPBLENDVB256 dst (VPSLLVD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLVDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) -(VPBLENDVB128 dst (VPSRAD128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRADMasked128constMerging dst [a] x (VPMOVVec32x4ToM mask)) -(VPBLENDVB256 dst (VPSRLVW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) -(VPBLENDVB128 dst (VPSUBUSB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBUSBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) -(VPBLENDVB128 dst (VPADDUSB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDUSBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) -(VPBLENDVB128 dst (VPMOVZXBW128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBWMasked128Merging dst x (VPMOVVec8x16ToM mask)) -(VPBLENDVB128 dst (VPMOVZXDQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXDQMasked256Merging dst x (VPMOVVec32x4ToM mask)) -(VPBLENDVB128 dst (VPROLVQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLVQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) +(VPBLENDVB128 dst (VBROADCASTSD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VBROADCASTSDMasked256Merging dst x (VPMOVVec64x2ToM mask)) +(VPBLENDVB128 dst (VBROADCASTSD512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VBROADCASTSDMasked512Merging dst x (VPMOVVec64x2ToM mask)) +(VPBLENDVB128 dst (VBROADCASTSS128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VBROADCASTSSMasked128Merging dst x (VPMOVVec32x4ToM mask)) +(VPBLENDVB128 dst (VBROADCASTSS256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VBROADCASTSSMasked256Merging dst x (VPMOVVec32x4ToM mask)) +(VPBLENDVB128 dst (VBROADCASTSS512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VBROADCASTSSMasked512Merging dst x (VPMOVVec32x4ToM mask)) +(VPBLENDVB128 dst (VCVTPS2UDQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VCVTPS2UDQMasked128Merging dst x (VPMOVVec32x4ToM mask)) +(VPBLENDVB128 dst (VCVTTPS2DQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VCVTTPS2DQMasked128Merging dst x (VPMOVVec32x4ToM mask)) +(VPBLENDVB128 dst (VDIVPD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VDIVPDMasked128Merging dst x y (VPMOVVec64x2ToM mask)) +(VPBLENDVB128 dst (VDIVPS128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VDIVPSMasked128Merging dst x y (VPMOVVec32x4ToM mask)) +(VPBLENDVB128 dst (VGF2P8MULB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VGF2P8MULBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) +(VPBLENDVB128 dst (VMAXPD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMAXPDMasked128Merging dst x y (VPMOVVec64x2ToM mask)) +(VPBLENDVB128 dst (VMAXPS128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMAXPSMasked128Merging dst x y (VPMOVVec32x4ToM mask)) +(VPBLENDVB128 dst (VMINPD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMINPDMasked128Merging dst x y (VPMOVVec64x2ToM mask)) +(VPBLENDVB128 dst (VMINPS128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMINPSMasked128Merging dst x y (VPMOVVec32x4ToM mask)) +(VPBLENDVB128 dst (VMULPD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMULPDMasked128Merging dst x y (VPMOVVec64x2ToM mask)) +(VPBLENDVB128 dst (VMULPS128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMULPSMasked128Merging dst x y (VPMOVVec32x4ToM mask)) +(VPBLENDVB128 dst (VPABSB128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSBMasked128Merging dst x (VPMOVVec8x16ToM mask)) +(VPBLENDVB128 dst (VPABSD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSDMasked128Merging dst x (VPMOVVec32x4ToM mask)) +(VPBLENDVB128 dst (VPABSQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSQMasked128Merging dst x (VPMOVVec64x2ToM mask)) +(VPBLENDVB128 dst (VPABSW128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSWMasked128Merging dst x (VPMOVVec16x8ToM mask)) +(VPBLENDVB128 dst (VPACKSSDW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPACKSSDWMasked128Merging dst x y (VPMOVVec32x4ToM mask)) +(VPBLENDVB128 dst (VPACKUSDW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPACKUSDWMasked128Merging dst x y (VPMOVVec32x4ToM mask)) (VPBLENDVB128 dst (VPADDB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) -(VPBLENDMDMasked512 dst (VPROLD512 [a] x) mask) => (VPROLDMasked512Merging dst [a] x mask) -(VPBLENDMQMasked512 dst (VPSRLVQ512 x y) mask) => (VPSRLVQMasked512Merging dst x y mask) -(VPBLENDMBMasked512 dst (VPSUBB512 x y) mask) => (VPSUBBMasked512Merging dst x y mask) -(VPBLENDVB256 dst (VPADDW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) +(VPBLENDVB128 dst (VPADDD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) (VPBLENDVB128 dst (VPADDQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) +(VPBLENDVB128 dst (VPADDSB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDSBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) +(VPBLENDVB128 dst (VPADDSW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDSWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) +(VPBLENDVB128 dst (VPADDUSB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDUSBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) (VPBLENDVB128 dst (VPADDUSW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDUSWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) +(VPBLENDVB128 dst (VPADDW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) +(VPBLENDVB128 dst (VPAVGB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPAVGBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) +(VPBLENDVB128 dst (VPAVGW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPAVGWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) (VPBLENDVB128 dst (VPBROADCASTB128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTBMasked128Merging dst x (VPMOVVec8x16ToM mask)) -(VPBLENDVB128 dst (VRNDSCALEPS128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRNDSCALEPSMasked128Merging dst [a] x (VPMOVVec32x4ToM mask)) -(VPBLENDVB256 dst (VREDUCEPD256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VREDUCEPDMasked256Merging dst [a] x (VPMOVVec64x4ToM mask)) -(VPBLENDVB128 dst (VPMINUW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) -(VPBLENDMDMasked512 dst (VPORD512 x y) mask) => (VPORDMasked512Merging dst x y mask) -(VPBLENDVB256 dst (VRNDSCALEPD256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRNDSCALEPDMasked256Merging dst [a] x (VPMOVVec64x4ToM mask)) -(VPBLENDVB128 dst (VPMINSW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) -(VPBLENDVB128 dst (VPMULLD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULLDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) -(VPBLENDVB128 dst (VPSHUFB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) -(VPBLENDVB128 dst (VPRORD128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORDMasked128Merging dst [a] x (VPMOVVec32x4ToM mask)) -(VPBLENDVB256 dst (VPRORVD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORVDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) -(VPBLENDMQMasked512 dst (VPRORVQ512 x y) mask) => (VPRORVQMasked512Merging dst x y mask) -(VPBLENDVB256 dst (VPSHLDW256 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHLDWMasked256Merging dst [a] x y (VPMOVVec16x16ToM mask)) -(VPBLENDVB128 dst (VCVTTPS2DQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VCVTTPS2DQMasked128Merging dst x (VPMOVVec32x4ToM mask)) -(VPBLENDVB256 dst (VCVTTPS2DQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VCVTTPS2DQMasked256Merging dst x (VPMOVVec32x8ToM mask)) -(VPBLENDVB128 dst (VMINPS128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMINPSMasked128Merging dst x y (VPMOVVec32x4ToM mask)) -(VPBLENDMDMasked512 dst (VPSHLDD512 [a] x y) mask) => (VPSHLDDMasked512Merging dst [a] x y mask) -(VPBLENDMQMasked512 dst (VPSRAVQ512 x y) mask) => (VPSRAVQMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VSUBPD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSUBPDMasked128Merging dst x y (VPMOVVec64x2ToM mask)) -(VPBLENDVB256 dst (VSUBPD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSUBPDMasked256Merging dst x y (VPMOVVec64x4ToM mask)) -(VPBLENDVB256 dst (VPSUBD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) -(VPBLENDMWMasked512 dst (VPADDW512 x y) mask) => (VPADDWMasked512Merging dst x y mask) -(VPBLENDMQMasked512 dst (VPANDQ512 x y) mask) => (VPANDQMasked512Merging dst x y mask) +(VPBLENDVB128 dst (VPBROADCASTB256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTBMasked256Merging dst x (VPMOVVec8x16ToM mask)) (VPBLENDVB128 dst (VPBROADCASTB512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTBMasked512Merging dst x (VPMOVVec8x16ToM mask)) -(VPBLENDMDMasked512 dst (VPACKUSDW512 x y) mask) => (VPACKUSDWMasked512Merging dst x y mask) -(VPBLENDMWMasked512 dst (VPSHUFHW512 [a] x) mask) => (VPSHUFHWMasked512Merging dst [a] x mask) -(VPBLENDVB128 dst (VRCP14PD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRCP14PDMasked128Merging dst x (VPMOVVec64x2ToM mask)) -(VPBLENDMWMasked512 dst (VPSHRDW512 [a] x y) mask) => (VPSHRDWMasked512Merging dst [a] x y mask) -(VPBLENDVB256 dst (VSQRTPS256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSQRTPSMasked256Merging dst x (VPMOVVec32x8ToM mask)) -(VPBLENDMWMasked512 dst (VPSUBSW512 x y) mask) => (VPSUBSWMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VPMOVSXWD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWDMasked256Merging dst x (VPMOVVec16x8ToM mask)) -(VPBLENDVB128 dst (VPBROADCASTW256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTWMasked256Merging dst x (VPMOVVec16x8ToM mask)) +(VPBLENDVB128 dst (VPBROADCASTD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTDMasked128Merging dst x (VPMOVVec32x4ToM mask)) (VPBLENDVB128 dst (VPBROADCASTD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTDMasked256Merging dst x (VPMOVVec32x4ToM mask)) -(VPBLENDMQMasked512 dst (VPMOVQB128 x) mask) => (VPMOVQBMasked128Merging dst x mask) -(VPBLENDVB256 dst (VPACKUSDW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPACKUSDWMasked256Merging dst x y (VPMOVVec32x8ToM mask)) -(VPBLENDMBMasked512 dst (VPMINSB512 x y) mask) => (VPMINSBMasked512Merging dst x y mask) -(VPBLENDVB256 dst (VPMULLD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULLDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) -(VPBLENDVB256 dst (VPADDB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) -(VPBLENDMBMasked512 dst (VPADDB512 x y) mask) => (VPADDBMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VPADDD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) -(VPBLENDVB256 dst (VPMOVWB128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVWBMasked128Merging dst x (VPMOVVec16x16ToM mask)) -(VPBLENDVB256 dst (VPMADDWD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMADDWDMasked256Merging dst x y (VPMOVVec16x16ToM mask)) -(VPBLENDMDMasked512 dst (VPMAXSD512 x y) mask) => (VPMAXSDMasked512Merging dst x y mask) -(VPBLENDMQMasked512 dst (VPSHLDQ512 [a] x y) mask) => (VPSHLDQMasked512Merging dst [a] x y mask) -(VPBLENDVB128 dst (VBROADCASTSS128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VBROADCASTSSMasked128Merging dst x (VPMOVVec32x4ToM mask)) -(VPBLENDVB256 dst (VPMOVQD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVQDMasked128Merging dst x (VPMOVVec64x4ToM mask)) -(VPBLENDVB128 dst (VPMOVSXDQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXDQMasked256Merging dst x (VPMOVVec32x4ToM mask)) -(VPBLENDMQMasked512 dst (VDIVPD512 x y) mask) => (VDIVPDMasked512Merging dst x y mask) -(VPBLENDMDMasked512 dst (VADDPS512 x y) mask) => (VADDPSMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VPMOVSXBD512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBDMasked512Merging dst x (VPMOVVec8x16ToM mask)) -(VPBLENDMDMasked512 dst (VPMOVUSDW256 x) mask) => (VPMOVUSDWMasked256Merging dst x mask) -(VPBLENDVB256 dst (VPMULHUW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULHUWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) -(VPBLENDVB256 dst (VPMULLQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULLQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) -(VPBLENDVB256 dst (VPROLVD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLVDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) -(VPBLENDMQMasked512 dst (VPROLVQ512 x y) mask) => (VPROLVQMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VPSHLDW128 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHLDWMasked128Merging dst [a] x y (VPMOVVec16x8ToM mask)) -(VPBLENDVB256 dst (VPMOVUSDW128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSDWMasked128Merging dst x (VPMOVVec32x8ToM mask)) -(VPBLENDVB128 dst (VPMAXUQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) -(VPBLENDVB256 dst (VPMULLW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULLWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) -(VPBLENDVB256 dst (VPRORD256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORDMasked256Merging dst [a] x (VPMOVVec32x8ToM mask)) -(VPBLENDMQMasked512 dst (VPRORQ512 [a] x) mask) => (VPRORQMasked512Merging dst [a] x mask) -(VPBLENDVB128 dst (VPSHLDD128 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHLDDMasked128Merging dst [a] x y (VPMOVVec32x4ToM mask)) -(VPBLENDVB256 dst (VPSRAVW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) -(VPBLENDVB128 dst (VSUBPS128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSUBPSMasked128Merging dst x y (VPMOVVec32x4ToM mask)) +(VPBLENDVB128 dst (VPBROADCASTD512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTDMasked512Merging dst x (VPMOVVec32x4ToM mask)) (VPBLENDVB128 dst (VPBROADCASTQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTQMasked128Merging dst x (VPMOVVec64x2ToM mask)) -(VPBLENDVB256 dst (VPMINUD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) -(VPBLENDVB256 dst (VPSHUFD256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFDMasked256Merging dst [a] x (VPMOVVec32x8ToM mask)) -(VPBLENDVB128 dst (VPRORVQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORVQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) -(VPBLENDVB256 dst (VPSLLVQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLVQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) -(VPBLENDMWMasked512 dst (VPSUBUSW512 x y) mask) => (VPSUBUSWMasked512Merging dst x y mask) -(VPBLENDMDMasked512 dst (VPMOVSDB128 x) mask) => (VPMOVSDBMasked128Merging dst x mask) -(VPBLENDVB256 dst (VPMOVUSQD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSQDMasked128Merging dst x (VPMOVVec64x4ToM mask)) -(VPBLENDMBMasked512 dst (VPMAXUB512 x y) mask) => (VPMAXUBMasked512Merging dst x y mask) -(VPBLENDMQMasked512 dst (VPMINSQ512 x y) mask) => (VPMINSQMasked512Merging dst x y mask) -(VPBLENDMQMasked512 dst (VSQRTPD512 x) mask) => (VSQRTPDMasked512Merging dst x mask) -(VPBLENDMDMasked512 dst (VSUBPS512 x y) mask) => (VSUBPSMasked512Merging dst x y mask) -(VPBLENDVB256 dst (VPSUBUSW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBUSWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) -(VPBLENDMDMasked512 dst (VPMAXUD512 x y) mask) => (VPMAXUDMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VBROADCASTSS512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VBROADCASTSSMasked512Merging dst x (VPMOVVec32x4ToM mask)) -(VPBLENDMQMasked512 dst (VPMOVSQD256 x) mask) => (VPMOVSQDMasked256Merging dst x mask) -(VPBLENDVB128 dst (VPMOVZXBD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBDMasked128Merging dst x (VPMOVVec8x16ToM mask)) -(VPBLENDVB128 dst (VPMOVZXBQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBQMasked128Merging dst x (VPMOVVec8x16ToM mask)) -(VPBLENDVB256 dst (VRSQRT14PD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRSQRT14PDMasked256Merging dst x (VPMOVVec64x4ToM mask)) -(VPBLENDMDMasked512 dst (VPRORD512 [a] x) mask) => (VPRORDMasked512Merging dst [a] x mask) -(VPBLENDMWMasked512 dst (VPSUBW512 x y) mask) => (VPSUBWMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VPABSW128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSWMasked128Merging dst x (VPMOVVec16x8ToM mask)) -(VPBLENDVB256 dst (VPADDSB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDSBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) -(VPBLENDMBMasked512 dst (VPADDUSB512 x y) mask) => (VPADDUSBMasked512Merging dst x y mask) -(VPBLENDVB256 dst (VPMOVZXWD512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXWDMasked512Merging dst x (VPMOVVec16x16ToM mask)) -(VPBLENDMQMasked512 dst (VMINPD512 x y) mask) => (VMINPDMasked512Merging dst x y mask) -(VPBLENDMQMasked512 dst (VPMULLQ512 x y) mask) => (VPMULLQMasked512Merging dst x y mask) -(VPBLENDMDMasked512 dst (VPROLVD512 x y) mask) => (VPROLVDMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VPSUBW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) -(VPBLENDMDMasked512 dst (VCVTTPS2DQ512 x) mask) => (VCVTTPS2DQMasked512Merging dst x mask) -(VPBLENDVB128 dst (VPMOVZXWQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXWQMasked128Merging dst x (VPMOVVec16x8ToM mask)) -(VPBLENDMWMasked512 dst (VPMADDWD512 x y) mask) => (VPMADDWDMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VGF2P8MULB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VGF2P8MULBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) -(VPBLENDVB256 dst (VPROLQ256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLQMasked256Merging dst [a] x (VPMOVVec64x4ToM mask)) -(VPBLENDMWMasked512 dst (VPSLLVW512 x y) mask) => (VPSLLVWMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VPABSD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSDMasked128Merging dst x (VPMOVVec32x4ToM mask)) -(VPBLENDVB256 dst (VPAVGB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPAVGBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) -(VPBLENDMBMasked512 dst (VPAVGB512 x y) mask) => (VPAVGBMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VPBROADCASTB256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTBMasked256Merging dst x (VPMOVVec8x16ToM mask)) -(VPBLENDVB128 dst (VMAXPD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMAXPDMasked128Merging dst x y (VPMOVVec64x2ToM mask)) -(VPBLENDMBMasked512 dst (VPMINUB512 x y) mask) => (VPMINUBMasked512Merging dst x y mask) +(VPBLENDVB128 dst (VPBROADCASTQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTQMasked256Merging dst x (VPMOVVec64x2ToM mask)) +(VPBLENDVB128 dst (VPBROADCASTQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTQMasked512Merging dst x (VPMOVVec64x2ToM mask)) +(VPBLENDVB128 dst (VPBROADCASTW128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTWMasked128Merging dst x (VPMOVVec16x8ToM mask)) +(VPBLENDVB128 dst (VPBROADCASTW256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTWMasked256Merging dst x (VPMOVVec16x8ToM mask)) +(VPBLENDVB128 dst (VPBROADCASTW512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTWMasked512Merging dst x (VPMOVVec16x8ToM mask)) +(VPBLENDVB128 dst (VPLZCNTD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPLZCNTDMasked128Merging dst x (VPMOVVec32x4ToM mask)) +(VPBLENDVB128 dst (VPLZCNTQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPLZCNTQMasked128Merging dst x (VPMOVVec64x2ToM mask)) +(VPBLENDVB128 dst (VPMADDUBSW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMADDUBSWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) +(VPBLENDVB128 dst (VPMADDWD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMADDWDMasked128Merging dst x y (VPMOVVec16x8ToM mask)) +(VPBLENDVB128 dst (VPMAXSB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) +(VPBLENDVB128 dst (VPMAXSD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) +(VPBLENDVB128 dst (VPMAXSQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) +(VPBLENDVB128 dst (VPMAXSW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) +(VPBLENDVB128 dst (VPMAXUB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) +(VPBLENDVB128 dst (VPMAXUD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) +(VPBLENDVB128 dst (VPMAXUQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) +(VPBLENDVB128 dst (VPMAXUW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) +(VPBLENDVB128 dst (VPMINSB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) +(VPBLENDVB128 dst (VPMINSD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) +(VPBLENDVB128 dst (VPMINSQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) +(VPBLENDVB128 dst (VPMINSW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) +(VPBLENDVB128 dst (VPMINUB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) +(VPBLENDVB128 dst (VPMINUD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) (VPBLENDVB128 dst (VPMINUQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) -(VPBLENDVB128 dst (VMULPS128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMULPSMasked128Merging dst x y (VPMOVVec32x4ToM mask)) -(VPBLENDMQMasked512 dst (VMAXPD512 x y) mask) => (VMAXPDMasked512Merging dst x y mask) -(VPBLENDMBMasked512 dst (VPMAXSB512 x y) mask) => (VPMAXSBMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VPMULHUW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULHUWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) -(VPBLENDVB128 dst (VMULPD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMULPDMasked128Merging dst x y (VPMOVVec64x2ToM mask)) -(VPBLENDVB256 dst (VPRORVQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORVQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) -(VPBLENDVB128 dst (VPSUBB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) -(VPBLENDMDMasked512 dst (VPACKSSDW512 x y) mask) => (VPACKSSDWMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VCVTPS2UDQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VCVTPS2UDQMasked128Merging dst x (VPMOVVec32x4ToM mask)) -(VPBLENDVB256 dst (VPMOVZXDQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXDQMasked512Merging dst x (VPMOVVec32x8ToM mask)) -(VPBLENDVB256 dst (VPMINUB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) -(VPBLENDMDMasked512 dst (VPRORVD512 x y) mask) => (VPRORVDMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VSCALEFPS128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSCALEFPSMasked128Merging dst x y (VPMOVVec32x4ToM mask)) -(VPBLENDVB128 dst (VPSLLVQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLVQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) -(VPBLENDVB256 dst (VPSLLW256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLWMasked256constMerging dst [a] x (VPMOVVec16x16ToM mask)) -(VPBLENDMWMasked512 dst (VPABSW512 x) mask) => (VPABSWMasked512Merging dst x mask) +(VPBLENDVB128 dst (VPMINUW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) +(VPBLENDVB128 dst (VPMOVDB128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVDBMasked128_128Merging dst x (VPMOVVec32x4ToM mask)) +(VPBLENDVB128 dst (VPMOVDW128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVDWMasked128_128Merging dst x (VPMOVVec32x4ToM mask)) +(VPBLENDVB128 dst (VPMOVQB128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVQBMasked128_128Merging dst x (VPMOVVec64x2ToM mask)) +(VPBLENDVB128 dst (VPMOVQD128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVQDMasked128_128Merging dst x (VPMOVVec64x2ToM mask)) +(VPBLENDVB128 dst (VPMOVQW128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVQWMasked128_128Merging dst x (VPMOVVec64x2ToM mask)) +(VPBLENDVB128 dst (VPMOVSDB128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSDBMasked128_128Merging dst x (VPMOVVec32x4ToM mask)) +(VPBLENDVB128 dst (VPMOVSDW128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSDWMasked128_128Merging dst x (VPMOVVec32x4ToM mask)) +(VPBLENDVB128 dst (VPMOVSQB128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSQBMasked128_128Merging dst x (VPMOVVec64x2ToM mask)) +(VPBLENDVB128 dst (VPMOVSQD128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSQDMasked128_128Merging dst x (VPMOVVec64x2ToM mask)) +(VPBLENDVB128 dst (VPMOVSQW128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSQWMasked128_128Merging dst x (VPMOVVec64x2ToM mask)) +(VPBLENDVB128 dst (VPMOVSWB128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSWBMasked128_128Merging dst x (VPMOVVec16x8ToM mask)) +(VPBLENDVB128 dst (VPMOVSXBD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBDMasked128Merging dst x (VPMOVVec8x16ToM mask)) +(VPBLENDVB128 dst (VPMOVSXBD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBDMasked256Merging dst x (VPMOVVec8x16ToM mask)) +(VPBLENDVB128 dst (VPMOVSXBD512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBDMasked512Merging dst x (VPMOVVec8x16ToM mask)) +(VPBLENDVB128 dst (VPMOVSXBQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBQMasked128Merging dst x (VPMOVVec8x16ToM mask)) (VPBLENDVB128 dst (VPMOVSXBQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBQMasked256Merging dst x (VPMOVVec8x16ToM mask)) -(VPBLENDVB256 dst (VSCALEFPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSCALEFPSMasked256Merging dst x y (VPMOVVec32x8ToM mask)) -(VPBLENDVB256 dst (VPSLLQ256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLQMasked256constMerging dst [a] x (VPMOVVec64x4ToM mask)) -(VPBLENDVB128 dst (VPADDW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) -(VPBLENDMQMasked512 dst (VMULPD512 x y) mask) => (VMULPDMasked512Merging dst x y mask) -(VPBLENDMQMasked512 dst (VPORQ512 x y) mask) => (VPORQMasked512Merging dst x y mask) +(VPBLENDVB128 dst (VPMOVSXBQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBQMasked512Merging dst x (VPMOVVec8x16ToM mask)) +(VPBLENDVB128 dst (VPMOVSXBW128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBWMasked128Merging dst x (VPMOVVec8x16ToM mask)) +(VPBLENDVB128 dst (VPMOVSXBW256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBWMasked256Merging dst x (VPMOVVec8x16ToM mask)) +(VPBLENDVB128 dst (VPMOVSXDQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXDQMasked128Merging dst x (VPMOVVec32x4ToM mask)) +(VPBLENDVB128 dst (VPMOVSXDQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXDQMasked256Merging dst x (VPMOVVec32x4ToM mask)) (VPBLENDVB128 dst (VPMOVSXWD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWDMasked128Merging dst x (VPMOVVec16x8ToM mask)) -(VPBLENDMQMasked512 dst (VPMOVUSQW128 x) mask) => (VPMOVUSQWMasked128Merging dst x mask) -(VPBLENDVB256 dst (VPMINSB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) -(VPBLENDVB128 dst (VRSQRT14PD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRSQRT14PDMasked128Merging dst x (VPMOVVec64x2ToM mask)) -(VPBLENDVB128 dst (VPSRAW128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAWMasked128constMerging dst [a] x (VPMOVVec16x8ToM mask)) -(VPBLENDVB256 dst (VPABSQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSQMasked256Merging dst x (VPMOVVec64x4ToM mask)) -(VPBLENDMQMasked512 dst (VREDUCEPD512 [a] x) mask) => (VREDUCEPDMasked512Merging dst [a] x mask) -(VPBLENDVB128 dst (VPMULHW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULHWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) -(VPBLENDVB256 dst (VPSHUFHW256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFHWMasked256Merging dst [a] x (VPMOVVec16x16ToM mask)) -(VPBLENDMWMasked512 dst (VPSRAW512const [a] x) mask) => (VPSRAWMasked512constMerging dst [a] x mask) -(VPBLENDMDMasked512 dst (VPADDD512 x y) mask) => (VPADDDMasked512Merging dst x y mask) -(VPBLENDMQMasked512 dst (VPOPCNTQ512 x) mask) => (VPOPCNTQMasked512Merging dst x mask) -(VPBLENDVB128 dst (VPSHRDD128 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHRDDMasked128Merging dst [a] x y (VPMOVVec32x4ToM mask)) -(VPBLENDVB256 dst (VPSUBB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) -(VPBLENDVB128 dst (VPSUBSB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBSBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) -(VPBLENDMBMasked512 dst (VPSUBUSB512 x y) mask) => (VPSUBUSBMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VPADDSW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDSWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) -(VPBLENDMWMasked512 dst (VPADDUSW512 x y) mask) => (VPADDUSWMasked512Merging dst x y mask) -(VPBLENDVB256 dst (VMAXPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMAXPSMasked256Merging dst x y (VPMOVVec32x8ToM mask)) -(VPBLENDVB256 dst (VPMAXSD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) -(VPBLENDVB128 dst (VPMINSB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) -(VPBLENDVB256 dst (VMULPD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMULPDMasked256Merging dst x y (VPMOVVec64x4ToM mask)) -(VPBLENDMDMasked512 dst (VRNDSCALEPS512 [a] x) mask) => (VRNDSCALEPSMasked512Merging dst [a] x mask) -(VPBLENDMDMasked512 dst (VCVTPS2UDQ512 x) mask) => (VCVTPS2UDQMasked512Merging dst x mask) -(VPBLENDVB256 dst (VDIVPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VDIVPSMasked256Merging dst x y (VPMOVVec32x8ToM mask)) -(VPBLENDVB256 dst (VPMAXSQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) -(VPBLENDVB256 dst (VMINPD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMINPDMasked256Merging dst x y (VPMOVVec64x4ToM mask)) -(VPBLENDVB128 dst (VPSHUFD128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFDMasked128Merging dst [a] x (VPMOVVec32x4ToM mask)) -(VPBLENDMBMasked512 dst (VPSHUFB512 x y) mask) => (VPSHUFBMasked512Merging dst x y mask) -(VPBLENDVB256 dst (VPSHLDQ256 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHLDQMasked256Merging dst [a] x y (VPMOVVec64x4ToM mask)) -(VPBLENDVB128 dst (VPBROADCASTQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTQMasked512Merging dst x (VPMOVVec64x2ToM mask)) -(VPBLENDVB128 dst (VREDUCEPS128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VREDUCEPSMasked128Merging dst [a] x (VPMOVVec32x4ToM mask)) -(VPBLENDVB128 dst (VPMOVZXWQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXWQMasked512Merging dst x (VPMOVVec16x8ToM mask)) -(VPBLENDVB256 dst (VSCALEFPD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSCALEFPDMasked256Merging dst x y (VPMOVVec64x4ToM mask)) -(VPBLENDMDMasked512 dst (VPSHRDD512 [a] x y) mask) => (VPSHRDDMasked512Merging dst [a] x y mask) -(VPBLENDVB128 dst (VPSRAVW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) -(VPBLENDVB128 dst (VSQRTPD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSQRTPDMasked128Merging dst x (VPMOVVec64x2ToM mask)) -(VPBLENDMQMasked512 dst (VPXORQ512 x y) mask) => (VPXORQMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VPAVGW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPAVGWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) -(VPBLENDVB256 dst (VPMOVSWB128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSWBMasked128Merging dst x (VPMOVVec16x16ToM mask)) -(VPBLENDVB128 dst (VDIVPS128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VDIVPSMasked128Merging dst x y (VPMOVVec32x4ToM mask)) -(VPBLENDVB128 dst (VDIVPD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VDIVPDMasked128Merging dst x y (VPMOVVec64x2ToM mask)) -(VPBLENDVB256 dst (VPMINSQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) -(VPBLENDMWMasked512 dst (VPOPCNTW512 x) mask) => (VPOPCNTWMasked512Merging dst x mask) +(VPBLENDVB128 dst (VPMOVSXWD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWDMasked256Merging dst x (VPMOVVec16x8ToM mask)) +(VPBLENDVB128 dst (VPMOVSXWQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWQMasked128Merging dst x (VPMOVVec16x8ToM mask)) +(VPBLENDVB128 dst (VPMOVSXWQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWQMasked256Merging dst x (VPMOVVec16x8ToM mask)) +(VPBLENDVB128 dst (VPMOVSXWQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWQMasked512Merging dst x (VPMOVVec16x8ToM mask)) +(VPBLENDVB128 dst (VPMOVUSDB128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSDBMasked128_128Merging dst x (VPMOVVec32x4ToM mask)) +(VPBLENDVB128 dst (VPMOVUSDW128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSDWMasked128_128Merging dst x (VPMOVVec32x4ToM mask)) +(VPBLENDVB128 dst (VPMOVUSQB128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSQBMasked128_128Merging dst x (VPMOVVec64x2ToM mask)) +(VPBLENDVB128 dst (VPMOVUSQD128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSQDMasked128_128Merging dst x (VPMOVVec64x2ToM mask)) +(VPBLENDVB128 dst (VPMOVUSQW128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSQWMasked128_128Merging dst x (VPMOVVec64x2ToM mask)) +(VPBLENDVB128 dst (VPMOVUSWB128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSWBMasked128_128Merging dst x (VPMOVVec16x8ToM mask)) +(VPBLENDVB128 dst (VPMOVWB128_128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVWBMasked128_128Merging dst x (VPMOVVec16x8ToM mask)) +(VPBLENDVB128 dst (VPMOVZXBD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBDMasked128Merging dst x (VPMOVVec8x16ToM mask)) +(VPBLENDVB128 dst (VPMOVZXBD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBDMasked256Merging dst x (VPMOVVec8x16ToM mask)) +(VPBLENDVB128 dst (VPMOVZXBD512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBDMasked512Merging dst x (VPMOVVec8x16ToM mask)) +(VPBLENDVB128 dst (VPMOVZXBQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBQMasked128Merging dst x (VPMOVVec8x16ToM mask)) +(VPBLENDVB128 dst (VPMOVZXBQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBQMasked256Merging dst x (VPMOVVec8x16ToM mask)) +(VPBLENDVB128 dst (VPMOVZXBQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBQMasked512Merging dst x (VPMOVVec8x16ToM mask)) +(VPBLENDVB128 dst (VPMOVZXBW128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBWMasked128Merging dst x (VPMOVVec8x16ToM mask)) +(VPBLENDVB128 dst (VPMOVZXBW256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBWMasked256Merging dst x (VPMOVVec8x16ToM mask)) +(VPBLENDVB128 dst (VPMOVZXDQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXDQMasked128Merging dst x (VPMOVVec32x4ToM mask)) +(VPBLENDVB128 dst (VPMOVZXDQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXDQMasked256Merging dst x (VPMOVVec32x4ToM mask)) +(VPBLENDVB128 dst (VPMOVZXWD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXWDMasked128Merging dst x (VPMOVVec16x8ToM mask)) +(VPBLENDVB128 dst (VPMOVZXWD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXWDMasked256Merging dst x (VPMOVVec16x8ToM mask)) +(VPBLENDVB128 dst (VPMOVZXWQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXWQMasked128Merging dst x (VPMOVVec16x8ToM mask)) +(VPBLENDVB128 dst (VPMOVZXWQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXWQMasked256Merging dst x (VPMOVVec16x8ToM mask)) +(VPBLENDVB128 dst (VPMOVZXWQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXWQMasked512Merging dst x (VPMOVVec16x8ToM mask)) +(VPBLENDVB128 dst (VPMULHUW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULHUWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) +(VPBLENDVB128 dst (VPMULHW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULHWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) +(VPBLENDVB128 dst (VPMULLD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULLDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) +(VPBLENDVB128 dst (VPMULLQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULLQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) +(VPBLENDVB128 dst (VPMULLW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULLWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) +(VPBLENDVB128 dst (VPOPCNTB128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPOPCNTBMasked128Merging dst x (VPMOVVec8x16ToM mask)) (VPBLENDVB128 dst (VPOPCNTD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPOPCNTDMasked128Merging dst x (VPMOVVec32x4ToM mask)) -(VPBLENDMDMasked512 dst (VPOPCNTD512 x) mask) => (VPOPCNTDMasked512Merging dst x mask) -(VPBLENDVB256 dst (VPABSD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSDMasked256Merging dst x (VPMOVVec32x8ToM mask)) -(VPBLENDVB128 dst (VPBROADCASTQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTQMasked256Merging dst x (VPMOVVec64x2ToM mask)) -(VPBLENDVB128 dst (VRNDSCALEPD128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRNDSCALEPDMasked128Merging dst [a] x (VPMOVVec64x2ToM mask)) -(VPBLENDMDMasked512 dst (VPMOVDB128 x) mask) => (VPMOVDBMasked128Merging dst x mask) -(VPBLENDVB128 dst (VPMOVSXWQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWQMasked128Merging dst x (VPMOVVec16x8ToM mask)) -(VPBLENDVB256 dst (VPMINUW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) -(VPBLENDMWMasked512 dst (VPMINUW512 x y) mask) => (VPMINUWMasked512Merging dst x y mask) (VPBLENDVB128 dst (VPOPCNTQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPOPCNTQMasked128Merging dst x (VPMOVVec64x2ToM mask)) -(VPBLENDMQMasked512 dst (VPMOVQD256 x) mask) => (VPMOVQDMasked256Merging dst x mask) -(VPBLENDVB256 dst (VPSHRDW256 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHRDWMasked256Merging dst [a] x y (VPMOVVec16x16ToM mask)) -(VPBLENDMDMasked512 dst (VPSRAD512const [a] x) mask) => (VPSRADMasked512constMerging dst [a] x mask) -(VPBLENDVB128 dst (VPAVGB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPAVGBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) -(VPBLENDMWMasked512 dst (VPAVGW512 x y) mask) => (VPAVGWMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VPMOVSXBQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBQMasked128Merging dst x (VPMOVVec8x16ToM mask)) -(VPBLENDVB256 dst (VPMOVZXBW512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBWMasked512Merging dst x (VPMOVVec8x32ToM mask)) -(VPBLENDVB128 dst (VPMAXSW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) -(VPBLENDVB256 dst (VPMAXUD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) -(VPBLENDMQMasked512 dst (VPMAXUQ512 x y) mask) => (VPMAXUQMasked512Merging dst x y mask) -(VPBLENDMDMasked512 dst (VMINPS512 x y) mask) => (VMINPSMasked512Merging dst x y mask) -(VPBLENDMBMasked512 dst (VPABSB512 x) mask) => (VPABSBMasked512Merging dst x mask) -(VPBLENDMDMasked512 dst (VPANDD512 x y) mask) => (VPANDDMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VPMOVZXBW256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBWMasked256Merging dst x (VPMOVVec8x16ToM mask)) -(VPBLENDVB128 dst (VPMOVZXBD512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBDMasked512Merging dst x (VPMOVVec8x16ToM mask)) -(VPBLENDVB256 dst (VPMAXSB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) -(VPBLENDMDMasked512 dst (VPSHUFD512 [a] x) mask) => (VPSHUFDMasked512Merging dst [a] x mask) +(VPBLENDVB128 dst (VPOPCNTW128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPOPCNTWMasked128Merging dst x (VPMOVVec16x8ToM mask)) +(VPBLENDVB128 dst (VPROLD128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLDMasked128Merging dst [a] x (VPMOVVec32x4ToM mask)) +(VPBLENDVB128 dst (VPROLQ128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLQMasked128Merging dst [a] x (VPMOVVec64x2ToM mask)) +(VPBLENDVB128 dst (VPROLVD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLVDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) +(VPBLENDVB128 dst (VPROLVQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLVQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) +(VPBLENDVB128 dst (VPRORD128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORDMasked128Merging dst [a] x (VPMOVVec32x4ToM mask)) +(VPBLENDVB128 dst (VPRORQ128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORQMasked128Merging dst [a] x (VPMOVVec64x2ToM mask)) +(VPBLENDVB128 dst (VPRORVD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORVDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) +(VPBLENDVB128 dst (VPRORVQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORVQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) +(VPBLENDVB128 dst (VPSHLDD128 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHLDDMasked128Merging dst [a] x y (VPMOVVec32x4ToM mask)) +(VPBLENDVB128 dst (VPSHLDQ128 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHLDQMasked128Merging dst [a] x y (VPMOVVec64x2ToM mask)) +(VPBLENDVB128 dst (VPSHLDW128 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHLDWMasked128Merging dst [a] x y (VPMOVVec16x8ToM mask)) +(VPBLENDVB128 dst (VPSHRDD128 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHRDDMasked128Merging dst [a] x y (VPMOVVec32x4ToM mask)) +(VPBLENDVB128 dst (VPSHRDQ128 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHRDQMasked128Merging dst [a] x y (VPMOVVec64x2ToM mask)) +(VPBLENDVB128 dst (VPSHRDW128 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHRDWMasked128Merging dst [a] x y (VPMOVVec16x8ToM mask)) +(VPBLENDVB128 dst (VPSHUFB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) +(VPBLENDVB128 dst (VPSHUFD128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFDMasked128Merging dst [a] x (VPMOVVec32x4ToM mask)) (VPBLENDVB128 dst (VPSHUFHW128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFHWMasked128Merging dst [a] x (VPMOVVec16x8ToM mask)) -(VPBLENDVB256 dst (VPSHRDQ256 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHRDQMasked256Merging dst [a] x y (VPMOVVec64x4ToM mask)) -(VPBLENDVB256 dst (VPMADDUBSW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMADDUBSWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) -(VPBLENDMDMasked512 dst (VPMINSD512 x y) mask) => (VPMINSDMasked512Merging dst x y mask) -(VPBLENDMDMasked512 dst (VPSRAVD512 x y) mask) => (VPSRAVDMasked512Merging dst x y mask) -(VPBLENDMQMasked512 dst (VSUBPD512 x y) mask) => (VSUBPDMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VPSLLW128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLWMasked128constMerging dst [a] x (VPMOVVec16x8ToM mask)) -(VPBLENDVB256 dst (VPSLLD256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLDMasked256constMerging dst [a] x (VPMOVVec32x8ToM mask)) -(VPBLENDMWMasked512 dst (VPMOVSWB256 x) mask) => (VPMOVSWBMasked256Merging dst x mask) -(VPBLENDMQMasked512 dst (VPMOVQW128 x) mask) => (VPMOVQWMasked128Merging dst x mask) -(VPBLENDVB256 dst (VPMINUQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) -(VPBLENDVB256 dst (VRCP14PD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRCP14PDMasked256Merging dst x (VPMOVVec64x4ToM mask)) -(VPBLENDVB256 dst (VPSHRDD256 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHRDDMasked256Merging dst [a] x y (VPMOVVec32x8ToM mask)) -(VPBLENDMQMasked512 dst (VPSHRDQ512 [a] x y) mask) => (VPSHRDQMasked512Merging dst [a] x y mask) +(VPBLENDVB128 dst (VPSLLD128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLDMasked128constMerging dst [a] x (VPMOVVec32x4ToM mask)) +(VPBLENDVB128 dst (VPSLLQ128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLQMasked128constMerging dst [a] x (VPMOVVec64x2ToM mask)) (VPBLENDVB128 dst (VPSLLVD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLVDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) +(VPBLENDVB128 dst (VPSLLVQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLVQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) +(VPBLENDVB128 dst (VPSLLVW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLVWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) +(VPBLENDVB128 dst (VPSLLW128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLWMasked128constMerging dst [a] x (VPMOVVec16x8ToM mask)) +(VPBLENDVB128 dst (VPSRAD128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRADMasked128constMerging dst [a] x (VPMOVVec32x4ToM mask)) +(VPBLENDVB128 dst (VPSRAQ128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAQMasked128constMerging dst [a] x (VPMOVVec64x2ToM mask)) +(VPBLENDVB128 dst (VPSRAVD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) +(VPBLENDVB128 dst (VPSRAVQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) +(VPBLENDVB128 dst (VPSRAVW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) +(VPBLENDVB128 dst (VPSRAW128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAWMasked128constMerging dst [a] x (VPMOVVec16x8ToM mask)) (VPBLENDVB128 dst (VPSRLVD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) -(VPBLENDVB256 dst (VPADDQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) -(VPBLENDVB128 dst (VPMOVSXWQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWQMasked512Merging dst x (VPMOVVec16x8ToM mask)) -(VPBLENDMQMasked512 dst (VPLZCNTQ512 x) mask) => (VPLZCNTQMasked512Merging dst x mask) -(VPBLENDVB256 dst (VPMAXUB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) -(VPBLENDVB256 dst (VPRORQ256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORQMasked256Merging dst [a] x (VPMOVVec64x4ToM mask)) -(VPBLENDMQMasked512 dst (VSCALEFPD512 x y) mask) => (VSCALEFPDMasked512Merging dst x y mask) +(VPBLENDVB128 dst (VPSRLVQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) +(VPBLENDVB128 dst (VPSRLVW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) +(VPBLENDVB128 dst (VPSUBB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) +(VPBLENDVB128 dst (VPSUBD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) (VPBLENDVB128 dst (VPSUBQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) -(VPBLENDVB128 dst (VPSLLD128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLDMasked128constMerging dst [a] x (VPMOVVec32x4ToM mask)) +(VPBLENDVB128 dst (VPSUBSB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBSBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) +(VPBLENDVB128 dst (VPSUBSW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBSWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) +(VPBLENDVB128 dst (VPSUBUSB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBUSBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) +(VPBLENDVB128 dst (VPSUBUSW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBUSWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) +(VPBLENDVB128 dst (VPSUBW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) +(VPBLENDVB128 dst (VRCP14PD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRCP14PDMasked128Merging dst x (VPMOVVec64x2ToM mask)) +(VPBLENDVB128 dst (VREDUCEPD128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VREDUCEPDMasked128Merging dst [a] x (VPMOVVec64x2ToM mask)) +(VPBLENDVB128 dst (VREDUCEPS128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VREDUCEPSMasked128Merging dst [a] x (VPMOVVec32x4ToM mask)) +(VPBLENDVB128 dst (VRNDSCALEPD128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRNDSCALEPDMasked128Merging dst [a] x (VPMOVVec64x2ToM mask)) +(VPBLENDVB128 dst (VRNDSCALEPS128 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRNDSCALEPSMasked128Merging dst [a] x (VPMOVVec32x4ToM mask)) +(VPBLENDVB128 dst (VRSQRT14PD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRSQRT14PDMasked128Merging dst x (VPMOVVec64x2ToM mask)) +(VPBLENDVB128 dst (VSCALEFPD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSCALEFPDMasked128Merging dst x y (VPMOVVec64x2ToM mask)) +(VPBLENDVB128 dst (VSCALEFPS128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSCALEFPSMasked128Merging dst x y (VPMOVVec32x4ToM mask)) +(VPBLENDVB128 dst (VSQRTPD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSQRTPDMasked128Merging dst x (VPMOVVec64x2ToM mask)) +(VPBLENDVB128 dst (VSQRTPS128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSQRTPSMasked128Merging dst x (VPMOVVec32x4ToM mask)) +(VPBLENDVB128 dst (VSUBPD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSUBPDMasked128Merging dst x y (VPMOVVec64x2ToM mask)) +(VPBLENDVB128 dst (VSUBPS128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSUBPSMasked128Merging dst x y (VPMOVVec32x4ToM mask)) (VPBLENDVB256 dst (VADDPD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VADDPDMasked256Merging dst x y (VPMOVVec64x4ToM mask)) -(VPBLENDMQMasked512 dst (VPMOVSQW128 x) mask) => (VPMOVSQWMasked128Merging dst x mask) -(VPBLENDMWMasked512 dst (VPMAXUW512 x y) mask) => (VPMAXUWMasked512Merging dst x y mask) -(VPBLENDVB256 dst (VPSHUFB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) -(VPBLENDVB128 dst (VPSRLVW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) -(VPBLENDVB128 dst (VPSLLQ128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLQMasked128constMerging dst [a] x (VPMOVVec64x2ToM mask)) -(VPBLENDVB256 dst (VPSRAD256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRADMasked256constMerging dst [a] x (VPMOVVec32x8ToM mask)) -(VPBLENDMQMasked512 dst (VPMINUQ512 x y) mask) => (VPMINUQMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VPSRAVD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) -(VPBLENDMWMasked512 dst (VPSRLVW512 x y) mask) => (VPSRLVWMasked512Merging dst x y mask) -(VPBLENDVB256 dst (VPSUBW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) -(VPBLENDVB256 dst (VPSRAW256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAWMasked256constMerging dst [a] x (VPMOVVec16x16ToM mask)) +(VPBLENDVB256 dst (VADDPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VADDPSMasked256Merging dst x y (VPMOVVec32x8ToM mask)) +(VPBLENDVB256 dst (VCVTPS2UDQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VCVTPS2UDQMasked256Merging dst x (VPMOVVec32x8ToM mask)) +(VPBLENDVB256 dst (VCVTTPS2DQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VCVTTPS2DQMasked256Merging dst x (VPMOVVec32x8ToM mask)) +(VPBLENDVB256 dst (VDIVPD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VDIVPDMasked256Merging dst x y (VPMOVVec64x4ToM mask)) +(VPBLENDVB256 dst (VDIVPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VDIVPSMasked256Merging dst x y (VPMOVVec32x8ToM mask)) +(VPBLENDVB256 dst (VGF2P8MULB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VGF2P8MULBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) +(VPBLENDVB256 dst (VMAXPD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMAXPDMasked256Merging dst x y (VPMOVVec64x4ToM mask)) +(VPBLENDVB256 dst (VMAXPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMAXPSMasked256Merging dst x y (VPMOVVec32x8ToM mask)) +(VPBLENDVB256 dst (VMINPD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMINPDMasked256Merging dst x y (VPMOVVec64x4ToM mask)) +(VPBLENDVB256 dst (VMINPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMINPSMasked256Merging dst x y (VPMOVVec32x8ToM mask)) +(VPBLENDVB256 dst (VMULPD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMULPDMasked256Merging dst x y (VPMOVVec64x4ToM mask)) +(VPBLENDVB256 dst (VMULPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMULPSMasked256Merging dst x y (VPMOVVec32x8ToM mask)) +(VPBLENDVB256 dst (VPABSB256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSBMasked256Merging dst x (VPMOVVec8x32ToM mask)) +(VPBLENDVB256 dst (VPABSD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSDMasked256Merging dst x (VPMOVVec32x8ToM mask)) +(VPBLENDVB256 dst (VPABSQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSQMasked256Merging dst x (VPMOVVec64x4ToM mask)) (VPBLENDVB256 dst (VPABSW256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSWMasked256Merging dst x (VPMOVVec16x16ToM mask)) (VPBLENDVB256 dst (VPACKSSDW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPACKSSDWMasked256Merging dst x y (VPMOVVec32x8ToM mask)) -(VPBLENDVB256 dst (VPMOVSQD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSQDMasked128Merging dst x (VPMOVVec64x4ToM mask)) -(VPBLENDVB128 dst (VPMOVSXBD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBDMasked128Merging dst x (VPMOVVec8x16ToM mask)) -(VPBLENDVB128 dst (VPMOVZXBQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBQMasked512Merging dst x (VPMOVVec8x16ToM mask)) -(VPBLENDVB256 dst (VPLZCNTD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPLZCNTDMasked256Merging dst x (VPMOVVec32x8ToM mask)) -(VPBLENDVB128 dst (VPLZCNTQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPLZCNTQMasked128Merging dst x (VPMOVVec64x2ToM mask)) -(VPBLENDVB256 dst (VMAXPD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMAXPDMasked256Merging dst x y (VPMOVVec64x4ToM mask)) +(VPBLENDVB256 dst (VPACKUSDW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPACKUSDWMasked256Merging dst x y (VPMOVVec32x8ToM mask)) +(VPBLENDVB256 dst (VPADDB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) +(VPBLENDVB256 dst (VPADDD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) +(VPBLENDVB256 dst (VPADDQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) +(VPBLENDVB256 dst (VPADDSB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDSBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) +(VPBLENDVB256 dst (VPADDSW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDSWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) +(VPBLENDVB256 dst (VPADDUSB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDUSBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) +(VPBLENDVB256 dst (VPADDUSW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDUSWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) +(VPBLENDVB256 dst (VPADDW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPADDWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) +(VPBLENDVB256 dst (VPAVGB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPAVGBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) (VPBLENDVB256 dst (VPAVGW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPAVGWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) -(VPBLENDVB128 dst (VPACKSSDW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPACKSSDWMasked128Merging dst x y (VPMOVVec32x4ToM mask)) -(VPBLENDVB128 dst (VPMOVZXWD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXWDMasked128Merging dst x (VPMOVVec16x8ToM mask)) -(VPBLENDVB256 dst (VPOPCNTQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPOPCNTQMasked256Merging dst x (VPMOVVec64x4ToM mask)) -(VPBLENDVB128 dst (VPSRAVQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) -(VPBLENDVB256 dst (VPSUBSW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBSWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) -(VPBLENDMDMasked512 dst (VPXORD512 x y) mask) => (VPXORDMasked512Merging dst x y mask) -(VPBLENDMBMasked512 dst (VPADDSB512 x y) mask) => (VPADDSBMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VPBROADCASTD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTDMasked128Merging dst x (VPMOVVec32x4ToM mask)) -(VPBLENDVB128 dst (VMAXPS128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VMAXPSMasked128Merging dst x y (VPMOVVec32x4ToM mask)) +(VPBLENDVB256 dst (VPLZCNTD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPLZCNTDMasked256Merging dst x (VPMOVVec32x8ToM mask)) +(VPBLENDVB256 dst (VPLZCNTQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPLZCNTQMasked256Merging dst x (VPMOVVec64x4ToM mask)) +(VPBLENDVB256 dst (VPMADDUBSW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMADDUBSWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) +(VPBLENDVB256 dst (VPMADDWD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMADDWDMasked256Merging dst x y (VPMOVVec16x16ToM mask)) +(VPBLENDVB256 dst (VPMAXSB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) +(VPBLENDVB256 dst (VPMAXSD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) +(VPBLENDVB256 dst (VPMAXSQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) +(VPBLENDVB256 dst (VPMAXSW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXSWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) +(VPBLENDVB256 dst (VPMAXUB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) +(VPBLENDVB256 dst (VPMAXUD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) +(VPBLENDVB256 dst (VPMAXUQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) (VPBLENDVB256 dst (VPMAXUW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) -(VPBLENDVB128 dst (VPSHRDQ128 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHRDQMasked128Merging dst [a] x y (VPMOVVec64x2ToM mask)) -(VPBLENDMDMasked512 dst (VPSLLVD512 x y) mask) => (VPSLLVDMasked512Merging dst x y mask) -(VPBLENDMWMasked512 dst (VPSLLW512const [a] x) mask) => (VPSLLWMasked512constMerging dst [a] x mask) -(VPBLENDMDMasked512 dst (VPSLLD512const [a] x) mask) => (VPSLLDMasked512constMerging dst [a] x mask) -(VPBLENDMWMasked512 dst (VPADDSW512 x y) mask) => (VPADDSWMasked512Merging dst x y mask) -(VPBLENDMQMasked512 dst (VPMOVSQB128 x) mask) => (VPMOVSQBMasked128Merging dst x mask) -(VPBLENDMDMasked512 dst (VPMINUD512 x y) mask) => (VPMINUDMasked512Merging dst x y mask) +(VPBLENDVB256 dst (VPMINSB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) +(VPBLENDVB256 dst (VPMINSD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) +(VPBLENDVB256 dst (VPMINSQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) +(VPBLENDVB256 dst (VPMINSW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINSWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) +(VPBLENDVB256 dst (VPMINUB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) +(VPBLENDVB256 dst (VPMINUD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) +(VPBLENDVB256 dst (VPMINUQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) +(VPBLENDVB256 dst (VPMINUW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMINUWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) +(VPBLENDVB256 dst (VPMOVDB128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVDBMasked128_256Merging dst x (VPMOVVec32x8ToM mask)) +(VPBLENDVB256 dst (VPMOVDW128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVDWMasked128_256Merging dst x (VPMOVVec32x8ToM mask)) +(VPBLENDVB256 dst (VPMOVQB128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVQBMasked128_256Merging dst x (VPMOVVec64x4ToM mask)) +(VPBLENDVB256 dst (VPMOVQD128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVQDMasked128_256Merging dst x (VPMOVVec64x4ToM mask)) +(VPBLENDVB256 dst (VPMOVQW128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVQWMasked128_256Merging dst x (VPMOVVec64x4ToM mask)) +(VPBLENDVB256 dst (VPMOVSDB128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSDBMasked128_256Merging dst x (VPMOVVec32x8ToM mask)) +(VPBLENDVB256 dst (VPMOVSDW128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSDWMasked128_256Merging dst x (VPMOVVec32x8ToM mask)) +(VPBLENDVB256 dst (VPMOVSQB128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSQBMasked128_256Merging dst x (VPMOVVec64x4ToM mask)) +(VPBLENDVB256 dst (VPMOVSQD128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSQDMasked128_256Merging dst x (VPMOVVec64x4ToM mask)) +(VPBLENDVB256 dst (VPMOVSQW128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSQWMasked128_256Merging dst x (VPMOVVec64x4ToM mask)) +(VPBLENDVB256 dst (VPMOVSWB128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSWBMasked128_256Merging dst x (VPMOVVec16x16ToM mask)) +(VPBLENDVB256 dst (VPMOVSXBW512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXBWMasked512Merging dst x (VPMOVVec8x32ToM mask)) +(VPBLENDVB256 dst (VPMOVSXDQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXDQMasked512Merging dst x (VPMOVVec32x8ToM mask)) +(VPBLENDVB256 dst (VPMOVSXWD512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVSXWDMasked512Merging dst x (VPMOVVec16x16ToM mask)) +(VPBLENDVB256 dst (VPMOVUSDB128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSDBMasked128_256Merging dst x (VPMOVVec32x8ToM mask)) +(VPBLENDVB256 dst (VPMOVUSDW128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSDWMasked128_256Merging dst x (VPMOVVec32x8ToM mask)) +(VPBLENDVB256 dst (VPMOVUSQB128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSQBMasked128_256Merging dst x (VPMOVVec64x4ToM mask)) +(VPBLENDVB256 dst (VPMOVUSQD128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSQDMasked128_256Merging dst x (VPMOVVec64x4ToM mask)) +(VPBLENDVB256 dst (VPMOVUSQW128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSQWMasked128_256Merging dst x (VPMOVVec64x4ToM mask)) +(VPBLENDVB256 dst (VPMOVUSWB128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVUSWBMasked128_256Merging dst x (VPMOVVec16x16ToM mask)) +(VPBLENDVB256 dst (VPMOVWB128_256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVWBMasked128_256Merging dst x (VPMOVVec16x16ToM mask)) +(VPBLENDVB256 dst (VPMOVZXBW512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXBWMasked512Merging dst x (VPMOVVec8x32ToM mask)) +(VPBLENDVB256 dst (VPMOVZXDQ512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXDQMasked512Merging dst x (VPMOVVec32x8ToM mask)) +(VPBLENDVB256 dst (VPMOVZXWD512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVZXWDMasked512Merging dst x (VPMOVVec16x16ToM mask)) +(VPBLENDVB256 dst (VPMULHUW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULHUWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) +(VPBLENDVB256 dst (VPMULHW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULHWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) +(VPBLENDVB256 dst (VPMULLD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULLDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) +(VPBLENDVB256 dst (VPMULLQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULLQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) +(VPBLENDVB256 dst (VPMULLW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMULLWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) +(VPBLENDVB256 dst (VPOPCNTB256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPOPCNTBMasked256Merging dst x (VPMOVVec8x32ToM mask)) +(VPBLENDVB256 dst (VPOPCNTD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPOPCNTDMasked256Merging dst x (VPMOVVec32x8ToM mask)) +(VPBLENDVB256 dst (VPOPCNTQ256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPOPCNTQMasked256Merging dst x (VPMOVVec64x4ToM mask)) (VPBLENDVB256 dst (VPOPCNTW256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPOPCNTWMasked256Merging dst x (VPMOVVec16x16ToM mask)) -(VPBLENDMQMasked512 dst (VRSQRT14PD512 x) mask) => (VRSQRT14PDMasked512Merging dst x mask) -(VPBLENDMDMasked512 dst (VSCALEFPS512 x y) mask) => (VSCALEFPSMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VPMAXUW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) +(VPBLENDVB256 dst (VPROLD256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLDMasked256Merging dst [a] x (VPMOVVec32x8ToM mask)) +(VPBLENDVB256 dst (VPROLQ256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLQMasked256Merging dst [a] x (VPMOVVec64x4ToM mask)) +(VPBLENDVB256 dst (VPROLVD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLVDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) +(VPBLENDVB256 dst (VPROLVQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPROLVQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) +(VPBLENDVB256 dst (VPRORD256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORDMasked256Merging dst [a] x (VPMOVVec32x8ToM mask)) +(VPBLENDVB256 dst (VPRORQ256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORQMasked256Merging dst [a] x (VPMOVVec64x4ToM mask)) +(VPBLENDVB256 dst (VPRORVD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORVDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) +(VPBLENDVB256 dst (VPRORVQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPRORVQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) +(VPBLENDVB256 dst (VPSHLDD256 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHLDDMasked256Merging dst [a] x y (VPMOVVec32x8ToM mask)) +(VPBLENDVB256 dst (VPSHLDQ256 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHLDQMasked256Merging dst [a] x y (VPMOVVec64x4ToM mask)) +(VPBLENDVB256 dst (VPSHLDW256 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHLDWMasked256Merging dst [a] x y (VPMOVVec16x16ToM mask)) +(VPBLENDVB256 dst (VPSHRDD256 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHRDDMasked256Merging dst [a] x y (VPMOVVec32x8ToM mask)) +(VPBLENDVB256 dst (VPSHRDQ256 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHRDQMasked256Merging dst [a] x y (VPMOVVec64x4ToM mask)) +(VPBLENDVB256 dst (VPSHRDW256 [a] x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHRDWMasked256Merging dst [a] x y (VPMOVVec16x16ToM mask)) +(VPBLENDVB256 dst (VPSHUFB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) +(VPBLENDVB256 dst (VPSHUFD256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFDMasked256Merging dst [a] x (VPMOVVec32x8ToM mask)) +(VPBLENDVB256 dst (VPSHUFHW256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSHUFHWMasked256Merging dst [a] x (VPMOVVec16x16ToM mask)) +(VPBLENDVB256 dst (VPSLLD256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLDMasked256constMerging dst [a] x (VPMOVVec32x8ToM mask)) +(VPBLENDVB256 dst (VPSLLQ256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLQMasked256constMerging dst [a] x (VPMOVVec64x4ToM mask)) +(VPBLENDVB256 dst (VPSLLVD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLVDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) +(VPBLENDVB256 dst (VPSLLVQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLVQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) +(VPBLENDVB256 dst (VPSLLVW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLVWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) +(VPBLENDVB256 dst (VPSLLW256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSLLWMasked256constMerging dst [a] x (VPMOVVec16x16ToM mask)) +(VPBLENDVB256 dst (VPSRAD256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRADMasked256constMerging dst [a] x (VPMOVVec32x8ToM mask)) +(VPBLENDVB256 dst (VPSRAQ256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAQMasked256constMerging dst [a] x (VPMOVVec64x4ToM mask)) +(VPBLENDVB256 dst (VPSRAVD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) (VPBLENDVB256 dst (VPSRAVQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) -(VPBLENDMDMasked512 dst (VSQRTPS512 x) mask) => (VSQRTPSMasked512Merging dst x mask) -(VPBLENDMQMasked512 dst (VPSRAQ512const [a] x) mask) => (VPSRAQMasked512constMerging dst [a] x mask) -(VPBLENDVB128 dst (VPABSB128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSBMasked128Merging dst x (VPMOVVec8x16ToM mask)) -(VPBLENDVB256 dst (VPABSB256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSBMasked256Merging dst x (VPMOVVec8x32ToM mask)) -(VPBLENDVB128 dst (VPABSQ128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPABSQMasked128Merging dst x (VPMOVVec64x2ToM mask)) -(VPBLENDVB256 dst (VPMOVDW128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMOVDWMasked128Merging dst x (VPMOVVec32x8ToM mask)) -(VPBLENDMQMasked512 dst (VPMAXSQ512 x y) mask) => (VPMAXSQMasked512Merging dst x y mask) -(VPBLENDVB128 dst (VSCALEFPD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSCALEFPDMasked128Merging dst x y (VPMOVVec64x2ToM mask)) -(VPBLENDVB128 dst (VSQRTPS128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSQRTPSMasked128Merging dst x (VPMOVVec32x4ToM mask)) +(VPBLENDVB256 dst (VPSRAVW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) +(VPBLENDVB256 dst (VPSRAW256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAWMasked256constMerging dst [a] x (VPMOVVec16x16ToM mask)) +(VPBLENDVB256 dst (VPSRLVD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) +(VPBLENDVB256 dst (VPSRLVQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) +(VPBLENDVB256 dst (VPSRLVW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) +(VPBLENDVB256 dst (VPSUBB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) +(VPBLENDVB256 dst (VPSUBD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) +(VPBLENDVB256 dst (VPSUBQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) (VPBLENDVB256 dst (VPSUBSB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBSBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) -(VPBLENDMDMasked512 dst (VPABSD512 x) mask) => (VPABSDMasked512Merging dst x mask) -(VPBLENDVB128 dst (VPBROADCASTW512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPBROADCASTWMasked512Merging dst x (VPMOVVec16x8ToM mask)) -(VPBLENDVB128 dst (VPMAXUB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPMAXUBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) -(VPBLENDMDMasked512 dst (VMULPS512 x y) mask) => (VMULPSMasked512Merging dst x y mask) -(VPBLENDMWMasked512 dst (VPMULLW512 x y) mask) => (VPMULLWMasked512Merging dst x y mask) +(VPBLENDVB256 dst (VPSUBSW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBSWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) +(VPBLENDVB256 dst (VPSUBUSB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBUSBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) +(VPBLENDVB256 dst (VPSUBUSW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBUSWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) +(VPBLENDVB256 dst (VPSUBW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) +(VPBLENDVB256 dst (VRCP14PD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRCP14PDMasked256Merging dst x (VPMOVVec64x4ToM mask)) +(VPBLENDVB256 dst (VREDUCEPD256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VREDUCEPDMasked256Merging dst [a] x (VPMOVVec64x4ToM mask)) +(VPBLENDVB256 dst (VREDUCEPS256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VREDUCEPSMasked256Merging dst [a] x (VPMOVVec32x8ToM mask)) +(VPBLENDVB256 dst (VRNDSCALEPD256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRNDSCALEPDMasked256Merging dst [a] x (VPMOVVec64x4ToM mask)) +(VPBLENDVB256 dst (VRNDSCALEPS256 [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRNDSCALEPSMasked256Merging dst [a] x (VPMOVVec32x8ToM mask)) +(VPBLENDVB256 dst (VRSQRT14PD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VRSQRT14PDMasked256Merging dst x (VPMOVVec64x4ToM mask)) +(VPBLENDVB256 dst (VSCALEFPD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSCALEFPDMasked256Merging dst x y (VPMOVVec64x4ToM mask)) +(VPBLENDVB256 dst (VSCALEFPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSCALEFPSMasked256Merging dst x y (VPMOVVec32x8ToM mask)) +(VPBLENDVB256 dst (VSQRTPD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSQRTPDMasked256Merging dst x (VPMOVVec64x4ToM mask)) +(VPBLENDVB256 dst (VSQRTPS256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSQRTPSMasked256Merging dst x (VPMOVVec32x8ToM mask)) +(VPBLENDVB256 dst (VSUBPD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSUBPDMasked256Merging dst x y (VPMOVVec64x4ToM mask)) +(VPBLENDVB256 dst (VSUBPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VSUBPSMasked256Merging dst x y (VPMOVVec32x8ToM mask)) (VPABSD512 l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPABSD512load {sym} [off] ptr mem) (VPABSQ128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPABSQ128load {sym} [off] ptr mem) (VPABSQ256 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPABSQ256load {sym} [off] ptr mem) diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go index 4f22d8582b..4e4f4a4205 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go +++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go @@ -585,37 +585,71 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VPMINUWMasked128", argLength: 3, reg: w2kw, asm: "VPMINUW", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMINUWMasked256", argLength: 3, reg: w2kw, asm: "VPMINUW", commutative: true, typ: "Vec256", resultInArg0: false}, {name: "VPMINUWMasked512", argLength: 3, reg: w2kw, asm: "VPMINUW", commutative: true, typ: "Vec512", resultInArg0: false}, - {name: "VPMOVDB128", argLength: 1, reg: w11, asm: "VPMOVDB", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPMOVDBMasked128", argLength: 2, reg: wkw, asm: "VPMOVDB", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPMOVDW128", argLength: 1, reg: w11, asm: "VPMOVDW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVDB128_128", argLength: 1, reg: w11, asm: "VPMOVDB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVDB128_256", argLength: 1, reg: w11, asm: "VPMOVDB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVDB128_512", argLength: 1, reg: w11, asm: "VPMOVDB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVDBMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVDB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVDBMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVDB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVDBMasked128_512", argLength: 2, reg: wkw, asm: "VPMOVDB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVDW128_128", argLength: 1, reg: w11, asm: "VPMOVDW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVDW128_256", argLength: 1, reg: w11, asm: "VPMOVDW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVDW256", argLength: 1, reg: w11, asm: "VPMOVDW", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPMOVDWMasked128", argLength: 2, reg: wkw, asm: "VPMOVDW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVDWMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVDW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVDWMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVDW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVDWMasked256", argLength: 2, reg: wkw, asm: "VPMOVDW", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPMOVQB128", argLength: 1, reg: w11, asm: "VPMOVQB", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPMOVQBMasked128", argLength: 2, reg: wkw, asm: "VPMOVQB", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPMOVQD128", argLength: 1, reg: w11, asm: "VPMOVQD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVQB128_128", argLength: 1, reg: w11, asm: "VPMOVQB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVQB128_256", argLength: 1, reg: w11, asm: "VPMOVQB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVQB128_512", argLength: 1, reg: w11, asm: "VPMOVQB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVQBMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVQB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVQBMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVQB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVQBMasked128_512", argLength: 2, reg: wkw, asm: "VPMOVQB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVQD128_128", argLength: 1, reg: w11, asm: "VPMOVQD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVQD128_256", argLength: 1, reg: w11, asm: "VPMOVQD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVQD256", argLength: 1, reg: w11, asm: "VPMOVQD", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPMOVQDMasked128", argLength: 2, reg: wkw, asm: "VPMOVQD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVQDMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVQD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVQDMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVQD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVQDMasked256", argLength: 2, reg: wkw, asm: "VPMOVQD", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPMOVQW128", argLength: 1, reg: w11, asm: "VPMOVQW", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPMOVQWMasked128", argLength: 2, reg: wkw, asm: "VPMOVQW", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPMOVSDB128", argLength: 1, reg: w11, asm: "VPMOVSDB", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPMOVSDBMasked128", argLength: 2, reg: wkw, asm: "VPMOVSDB", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPMOVSDW128", argLength: 1, reg: w11, asm: "VPMOVSDW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVQW128_128", argLength: 1, reg: w11, asm: "VPMOVQW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVQW128_256", argLength: 1, reg: w11, asm: "VPMOVQW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVQW128_512", argLength: 1, reg: w11, asm: "VPMOVQW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVQWMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVQW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVQWMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVQW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVQWMasked128_512", argLength: 2, reg: wkw, asm: "VPMOVQW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSDB128_128", argLength: 1, reg: w11, asm: "VPMOVSDB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSDB128_256", argLength: 1, reg: w11, asm: "VPMOVSDB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSDB128_512", argLength: 1, reg: w11, asm: "VPMOVSDB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSDBMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVSDB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSDBMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVSDB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSDBMasked128_512", argLength: 2, reg: wkw, asm: "VPMOVSDB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSDW128_128", argLength: 1, reg: w11, asm: "VPMOVSDW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSDW128_256", argLength: 1, reg: w11, asm: "VPMOVSDW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVSDW256", argLength: 1, reg: w11, asm: "VPMOVSDW", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPMOVSDWMasked128", argLength: 2, reg: wkw, asm: "VPMOVSDW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSDWMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVSDW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSDWMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVSDW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVSDWMasked256", argLength: 2, reg: wkw, asm: "VPMOVSDW", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPMOVSQB128", argLength: 1, reg: w11, asm: "VPMOVSQB", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPMOVSQBMasked128", argLength: 2, reg: wkw, asm: "VPMOVSQB", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPMOVSQD128", argLength: 1, reg: w11, asm: "VPMOVSQD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSQB128_128", argLength: 1, reg: w11, asm: "VPMOVSQB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSQB128_256", argLength: 1, reg: w11, asm: "VPMOVSQB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSQB128_512", argLength: 1, reg: w11, asm: "VPMOVSQB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSQBMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVSQB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSQBMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVSQB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSQBMasked128_512", argLength: 2, reg: wkw, asm: "VPMOVSQB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSQD128_128", argLength: 1, reg: w11, asm: "VPMOVSQD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSQD128_256", argLength: 1, reg: w11, asm: "VPMOVSQD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVSQD256", argLength: 1, reg: w11, asm: "VPMOVSQD", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPMOVSQDMasked128", argLength: 2, reg: wkw, asm: "VPMOVSQD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSQDMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVSQD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSQDMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVSQD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVSQDMasked256", argLength: 2, reg: wkw, asm: "VPMOVSQD", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPMOVSQW128", argLength: 1, reg: w11, asm: "VPMOVSQW", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPMOVSQWMasked128", argLength: 2, reg: wkw, asm: "VPMOVSQW", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPMOVSWB128", argLength: 1, reg: w11, asm: "VPMOVSWB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSQW128_128", argLength: 1, reg: w11, asm: "VPMOVSQW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSQW128_256", argLength: 1, reg: w11, asm: "VPMOVSQW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSQW128_512", argLength: 1, reg: w11, asm: "VPMOVSQW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSQWMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVSQW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSQWMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVSQW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSQWMasked128_512", argLength: 2, reg: wkw, asm: "VPMOVSQW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSWB128_128", argLength: 1, reg: w11, asm: "VPMOVSWB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSWB128_256", argLength: 1, reg: w11, asm: "VPMOVSWB", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVSWB256", argLength: 1, reg: w11, asm: "VPMOVSWB", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPMOVSWBMasked128", argLength: 2, reg: wkw, asm: "VPMOVSWB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSWBMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVSWB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVSWBMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVSWB", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVSWBMasked256", argLength: 2, reg: wkw, asm: "VPMOVSWB", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPMOVSXBD128", argLength: 1, reg: v11, asm: "VPMOVSXBD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVSXBD256", argLength: 1, reg: v11, asm: "VPMOVSXBD", commutative: false, typ: "Vec256", resultInArg0: false}, @@ -653,27 +687,47 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VPMOVSXWQMasked128", argLength: 2, reg: wkw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVSXWQMasked256", argLength: 2, reg: wkw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPMOVSXWQMasked512", argLength: 2, reg: wkw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VPMOVUSDB128", argLength: 1, reg: w11, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPMOVUSDBMasked128", argLength: 2, reg: wkw, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPMOVUSDW128", argLength: 1, reg: w11, asm: "VPMOVUSDW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSDB128_128", argLength: 1, reg: w11, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSDB128_256", argLength: 1, reg: w11, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSDB128_512", argLength: 1, reg: w11, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSDBMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSDBMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSDBMasked128_512", argLength: 2, reg: wkw, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSDW128_128", argLength: 1, reg: w11, asm: "VPMOVUSDW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSDW128_256", argLength: 1, reg: w11, asm: "VPMOVUSDW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVUSDW256", argLength: 1, reg: w11, asm: "VPMOVUSDW", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPMOVUSDWMasked128", argLength: 2, reg: wkw, asm: "VPMOVUSDW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSDWMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVUSDW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSDWMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVUSDW", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVUSDWMasked256", argLength: 2, reg: wkw, asm: "VPMOVUSDW", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPMOVUSQB128", argLength: 1, reg: w11, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPMOVUSQBMasked128", argLength: 2, reg: wkw, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPMOVUSQD128", argLength: 1, reg: w11, asm: "VPMOVUSQD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSQB128_128", argLength: 1, reg: w11, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSQB128_256", argLength: 1, reg: w11, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSQB128_512", argLength: 1, reg: w11, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSQBMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSQBMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSQBMasked128_512", argLength: 2, reg: wkw, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSQD128_128", argLength: 1, reg: w11, asm: "VPMOVUSQD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSQD128_256", argLength: 1, reg: w11, asm: "VPMOVUSQD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVUSQD256", argLength: 1, reg: w11, asm: "VPMOVUSQD", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPMOVUSQDMasked128", argLength: 2, reg: wkw, asm: "VPMOVUSQD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSQDMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVUSQD", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSQDMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVUSQD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVUSQDMasked256", argLength: 2, reg: wkw, asm: "VPMOVUSQD", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPMOVUSQW128", argLength: 1, reg: w11, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPMOVUSQWMasked128", argLength: 2, reg: wkw, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPMOVUSWB128", argLength: 1, reg: w11, asm: "VPMOVUSWB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSQW128_128", argLength: 1, reg: w11, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSQW128_256", argLength: 1, reg: w11, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSQW128_512", argLength: 1, reg: w11, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSQWMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSQWMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSQWMasked128_512", argLength: 2, reg: wkw, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSWB128_128", argLength: 1, reg: w11, asm: "VPMOVUSWB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSWB128_256", argLength: 1, reg: w11, asm: "VPMOVUSWB", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVUSWB256", argLength: 1, reg: w11, asm: "VPMOVUSWB", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPMOVUSWBMasked128", argLength: 2, reg: wkw, asm: "VPMOVUSWB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSWBMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVUSWB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVUSWBMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVUSWB", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVUSWBMasked256", argLength: 2, reg: wkw, asm: "VPMOVUSWB", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPMOVWB128", argLength: 1, reg: w11, asm: "VPMOVWB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVWB128_128", argLength: 1, reg: w11, asm: "VPMOVWB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVWB128_256", argLength: 1, reg: w11, asm: "VPMOVWB", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVWB256", argLength: 1, reg: w11, asm: "VPMOVWB", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPMOVWBMasked128", argLength: 2, reg: wkw, asm: "VPMOVWB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVWBMasked128_128", argLength: 2, reg: wkw, asm: "VPMOVWB", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPMOVWBMasked128_256", argLength: 2, reg: wkw, asm: "VPMOVWB", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVWBMasked256", argLength: 2, reg: wkw, asm: "VPMOVWB", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPMOVZXBD128", argLength: 1, reg: v11, asm: "VPMOVZXBD", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMOVZXBD256", argLength: 1, reg: v11, asm: "VPMOVZXBD", commutative: false, typ: "Vec256", resultInArg0: false}, @@ -2064,21 +2118,38 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VPMINUWMasked128Merging", argLength: 4, reg: w3kw, asm: "VPMINUW", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPMINUWMasked256Merging", argLength: 4, reg: w3kw, asm: "VPMINUW", commutative: false, typ: "Vec256", resultInArg0: true}, {name: "VPMINUWMasked512Merging", argLength: 4, reg: w3kw, asm: "VPMINUW", commutative: false, typ: "Vec512", resultInArg0: true}, - {name: "VPMOVDBMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVDB", commutative: false, typ: "Vec128", resultInArg0: true}, - {name: "VPMOVDWMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVDW", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVDBMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVDB", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVDBMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVDB", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVDBMasked128_512Merging", argLength: 3, reg: w2kw, asm: "VPMOVDB", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVDWMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVDW", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVDWMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVDW", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPMOVDWMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVDW", commutative: false, typ: "Vec256", resultInArg0: true}, - {name: "VPMOVQBMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVQB", commutative: false, typ: "Vec128", resultInArg0: true}, - {name: "VPMOVQDMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVQD", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVQBMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVQB", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVQBMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVQB", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVQBMasked128_512Merging", argLength: 3, reg: w2kw, asm: "VPMOVQB", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVQDMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVQD", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVQDMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVQD", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPMOVQDMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVQD", commutative: false, typ: "Vec256", resultInArg0: true}, - {name: "VPMOVQWMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVQW", commutative: false, typ: "Vec128", resultInArg0: true}, - {name: "VPMOVSDBMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSDB", commutative: false, typ: "Vec128", resultInArg0: true}, - {name: "VPMOVSDWMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSDW", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVQWMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVQW", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVQWMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVQW", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVQWMasked128_512Merging", argLength: 3, reg: w2kw, asm: "VPMOVQW", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVSDBMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSDB", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVSDBMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVSDB", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVSDBMasked128_512Merging", argLength: 3, reg: w2kw, asm: "VPMOVSDB", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVSDWMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSDW", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVSDWMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVSDW", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPMOVSDWMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVSDW", commutative: false, typ: "Vec256", resultInArg0: true}, - {name: "VPMOVSQBMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSQB", commutative: false, typ: "Vec128", resultInArg0: true}, - {name: "VPMOVSQDMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSQD", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVSQBMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSQB", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVSQBMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVSQB", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVSQBMasked128_512Merging", argLength: 3, reg: w2kw, asm: "VPMOVSQB", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVSQDMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSQD", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVSQDMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVSQD", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPMOVSQDMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVSQD", commutative: false, typ: "Vec256", resultInArg0: true}, - {name: "VPMOVSQWMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSQW", commutative: false, typ: "Vec128", resultInArg0: true}, - {name: "VPMOVSWBMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSWB", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVSQWMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSQW", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVSQWMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVSQW", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVSQWMasked128_512Merging", argLength: 3, reg: w2kw, asm: "VPMOVSQW", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVSWBMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSWB", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVSWBMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVSWB", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPMOVSWBMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVSWB", commutative: false, typ: "Vec256", resultInArg0: true}, {name: "VPMOVSXBDMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXBD", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPMOVSXBDMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXBD", commutative: false, typ: "Vec256", resultInArg0: true}, @@ -2098,16 +2169,26 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VPMOVSXWQMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPMOVSXWQMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec256", resultInArg0: true}, {name: "VPMOVSXWQMasked512Merging", argLength: 3, reg: w2kw, asm: "VPMOVSXWQ", commutative: false, typ: "Vec512", resultInArg0: true}, - {name: "VPMOVUSDBMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: true}, - {name: "VPMOVUSDWMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSDW", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVUSDBMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVUSDBMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVUSDBMasked128_512Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSDB", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVUSDWMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSDW", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVUSDWMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSDW", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPMOVUSDWMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSDW", commutative: false, typ: "Vec256", resultInArg0: true}, - {name: "VPMOVUSQBMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: true}, - {name: "VPMOVUSQDMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQD", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVUSQBMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVUSQBMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVUSQBMasked128_512Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQB", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVUSQDMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQD", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVUSQDMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQD", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPMOVUSQDMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQD", commutative: false, typ: "Vec256", resultInArg0: true}, - {name: "VPMOVUSQWMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: true}, - {name: "VPMOVUSWBMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSWB", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVUSQWMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVUSQWMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVUSQWMasked128_512Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSQW", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVUSWBMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSWB", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVUSWBMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSWB", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPMOVUSWBMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVUSWB", commutative: false, typ: "Vec256", resultInArg0: true}, - {name: "VPMOVWBMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVWB", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVWBMasked128_128Merging", argLength: 3, reg: w2kw, asm: "VPMOVWB", commutative: false, typ: "Vec128", resultInArg0: true}, + {name: "VPMOVWBMasked128_256Merging", argLength: 3, reg: w2kw, asm: "VPMOVWB", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPMOVWBMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVWB", commutative: false, typ: "Vec256", resultInArg0: true}, {name: "VPMOVZXBDMasked128Merging", argLength: 3, reg: w2kw, asm: "VPMOVZXBD", commutative: false, typ: "Vec128", resultInArg0: true}, {name: "VPMOVZXBDMasked256Merging", argLength: 3, reg: w2kw, asm: "VPMOVZXBD", commutative: false, typ: "Vec256", resultInArg0: true}, diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 4dd7faeebf..1d3875a9be 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -1826,37 +1826,71 @@ const ( OpAMD64VPMINUWMasked128 OpAMD64VPMINUWMasked256 OpAMD64VPMINUWMasked512 - OpAMD64VPMOVDB128 - OpAMD64VPMOVDBMasked128 - OpAMD64VPMOVDW128 + OpAMD64VPMOVDB128_128 + OpAMD64VPMOVDB128_256 + OpAMD64VPMOVDB128_512 + OpAMD64VPMOVDBMasked128_128 + OpAMD64VPMOVDBMasked128_256 + OpAMD64VPMOVDBMasked128_512 + OpAMD64VPMOVDW128_128 + OpAMD64VPMOVDW128_256 OpAMD64VPMOVDW256 - OpAMD64VPMOVDWMasked128 + OpAMD64VPMOVDWMasked128_128 + OpAMD64VPMOVDWMasked128_256 OpAMD64VPMOVDWMasked256 - OpAMD64VPMOVQB128 - OpAMD64VPMOVQBMasked128 - OpAMD64VPMOVQD128 + OpAMD64VPMOVQB128_128 + OpAMD64VPMOVQB128_256 + OpAMD64VPMOVQB128_512 + OpAMD64VPMOVQBMasked128_128 + OpAMD64VPMOVQBMasked128_256 + OpAMD64VPMOVQBMasked128_512 + OpAMD64VPMOVQD128_128 + OpAMD64VPMOVQD128_256 OpAMD64VPMOVQD256 - OpAMD64VPMOVQDMasked128 + OpAMD64VPMOVQDMasked128_128 + OpAMD64VPMOVQDMasked128_256 OpAMD64VPMOVQDMasked256 - OpAMD64VPMOVQW128 - OpAMD64VPMOVQWMasked128 - OpAMD64VPMOVSDB128 - OpAMD64VPMOVSDBMasked128 - OpAMD64VPMOVSDW128 + OpAMD64VPMOVQW128_128 + OpAMD64VPMOVQW128_256 + OpAMD64VPMOVQW128_512 + OpAMD64VPMOVQWMasked128_128 + OpAMD64VPMOVQWMasked128_256 + OpAMD64VPMOVQWMasked128_512 + OpAMD64VPMOVSDB128_128 + OpAMD64VPMOVSDB128_256 + OpAMD64VPMOVSDB128_512 + OpAMD64VPMOVSDBMasked128_128 + OpAMD64VPMOVSDBMasked128_256 + OpAMD64VPMOVSDBMasked128_512 + OpAMD64VPMOVSDW128_128 + OpAMD64VPMOVSDW128_256 OpAMD64VPMOVSDW256 - OpAMD64VPMOVSDWMasked128 + OpAMD64VPMOVSDWMasked128_128 + OpAMD64VPMOVSDWMasked128_256 OpAMD64VPMOVSDWMasked256 - OpAMD64VPMOVSQB128 - OpAMD64VPMOVSQBMasked128 - OpAMD64VPMOVSQD128 + OpAMD64VPMOVSQB128_128 + OpAMD64VPMOVSQB128_256 + OpAMD64VPMOVSQB128_512 + OpAMD64VPMOVSQBMasked128_128 + OpAMD64VPMOVSQBMasked128_256 + OpAMD64VPMOVSQBMasked128_512 + OpAMD64VPMOVSQD128_128 + OpAMD64VPMOVSQD128_256 OpAMD64VPMOVSQD256 - OpAMD64VPMOVSQDMasked128 + OpAMD64VPMOVSQDMasked128_128 + OpAMD64VPMOVSQDMasked128_256 OpAMD64VPMOVSQDMasked256 - OpAMD64VPMOVSQW128 - OpAMD64VPMOVSQWMasked128 - OpAMD64VPMOVSWB128 + OpAMD64VPMOVSQW128_128 + OpAMD64VPMOVSQW128_256 + OpAMD64VPMOVSQW128_512 + OpAMD64VPMOVSQWMasked128_128 + OpAMD64VPMOVSQWMasked128_256 + OpAMD64VPMOVSQWMasked128_512 + OpAMD64VPMOVSWB128_128 + OpAMD64VPMOVSWB128_256 OpAMD64VPMOVSWB256 - OpAMD64VPMOVSWBMasked128 + OpAMD64VPMOVSWBMasked128_128 + OpAMD64VPMOVSWBMasked128_256 OpAMD64VPMOVSWBMasked256 OpAMD64VPMOVSXBD128 OpAMD64VPMOVSXBD256 @@ -1894,27 +1928,47 @@ const ( OpAMD64VPMOVSXWQMasked128 OpAMD64VPMOVSXWQMasked256 OpAMD64VPMOVSXWQMasked512 - OpAMD64VPMOVUSDB128 - OpAMD64VPMOVUSDBMasked128 - OpAMD64VPMOVUSDW128 + OpAMD64VPMOVUSDB128_128 + OpAMD64VPMOVUSDB128_256 + OpAMD64VPMOVUSDB128_512 + OpAMD64VPMOVUSDBMasked128_128 + OpAMD64VPMOVUSDBMasked128_256 + OpAMD64VPMOVUSDBMasked128_512 + OpAMD64VPMOVUSDW128_128 + OpAMD64VPMOVUSDW128_256 OpAMD64VPMOVUSDW256 - OpAMD64VPMOVUSDWMasked128 + OpAMD64VPMOVUSDWMasked128_128 + OpAMD64VPMOVUSDWMasked128_256 OpAMD64VPMOVUSDWMasked256 - OpAMD64VPMOVUSQB128 - OpAMD64VPMOVUSQBMasked128 - OpAMD64VPMOVUSQD128 + OpAMD64VPMOVUSQB128_128 + OpAMD64VPMOVUSQB128_256 + OpAMD64VPMOVUSQB128_512 + OpAMD64VPMOVUSQBMasked128_128 + OpAMD64VPMOVUSQBMasked128_256 + OpAMD64VPMOVUSQBMasked128_512 + OpAMD64VPMOVUSQD128_128 + OpAMD64VPMOVUSQD128_256 OpAMD64VPMOVUSQD256 - OpAMD64VPMOVUSQDMasked128 + OpAMD64VPMOVUSQDMasked128_128 + OpAMD64VPMOVUSQDMasked128_256 OpAMD64VPMOVUSQDMasked256 - OpAMD64VPMOVUSQW128 - OpAMD64VPMOVUSQWMasked128 - OpAMD64VPMOVUSWB128 + OpAMD64VPMOVUSQW128_128 + OpAMD64VPMOVUSQW128_256 + OpAMD64VPMOVUSQW128_512 + OpAMD64VPMOVUSQWMasked128_128 + OpAMD64VPMOVUSQWMasked128_256 + OpAMD64VPMOVUSQWMasked128_512 + OpAMD64VPMOVUSWB128_128 + OpAMD64VPMOVUSWB128_256 OpAMD64VPMOVUSWB256 - OpAMD64VPMOVUSWBMasked128 + OpAMD64VPMOVUSWBMasked128_128 + OpAMD64VPMOVUSWBMasked128_256 OpAMD64VPMOVUSWBMasked256 - OpAMD64VPMOVWB128 + OpAMD64VPMOVWB128_128 + OpAMD64VPMOVWB128_256 OpAMD64VPMOVWB256 - OpAMD64VPMOVWBMasked128 + OpAMD64VPMOVWBMasked128_128 + OpAMD64VPMOVWBMasked128_256 OpAMD64VPMOVWBMasked256 OpAMD64VPMOVZXBD128 OpAMD64VPMOVZXBD256 @@ -3305,21 +3359,38 @@ const ( OpAMD64VPMINUWMasked128Merging OpAMD64VPMINUWMasked256Merging OpAMD64VPMINUWMasked512Merging - OpAMD64VPMOVDBMasked128Merging - OpAMD64VPMOVDWMasked128Merging + OpAMD64VPMOVDBMasked128_128Merging + OpAMD64VPMOVDBMasked128_256Merging + OpAMD64VPMOVDBMasked128_512Merging + OpAMD64VPMOVDWMasked128_128Merging + OpAMD64VPMOVDWMasked128_256Merging OpAMD64VPMOVDWMasked256Merging - OpAMD64VPMOVQBMasked128Merging - OpAMD64VPMOVQDMasked128Merging + OpAMD64VPMOVQBMasked128_128Merging + OpAMD64VPMOVQBMasked128_256Merging + OpAMD64VPMOVQBMasked128_512Merging + OpAMD64VPMOVQDMasked128_128Merging + OpAMD64VPMOVQDMasked128_256Merging OpAMD64VPMOVQDMasked256Merging - OpAMD64VPMOVQWMasked128Merging - OpAMD64VPMOVSDBMasked128Merging - OpAMD64VPMOVSDWMasked128Merging + OpAMD64VPMOVQWMasked128_128Merging + OpAMD64VPMOVQWMasked128_256Merging + OpAMD64VPMOVQWMasked128_512Merging + OpAMD64VPMOVSDBMasked128_128Merging + OpAMD64VPMOVSDBMasked128_256Merging + OpAMD64VPMOVSDBMasked128_512Merging + OpAMD64VPMOVSDWMasked128_128Merging + OpAMD64VPMOVSDWMasked128_256Merging OpAMD64VPMOVSDWMasked256Merging - OpAMD64VPMOVSQBMasked128Merging - OpAMD64VPMOVSQDMasked128Merging + OpAMD64VPMOVSQBMasked128_128Merging + OpAMD64VPMOVSQBMasked128_256Merging + OpAMD64VPMOVSQBMasked128_512Merging + OpAMD64VPMOVSQDMasked128_128Merging + OpAMD64VPMOVSQDMasked128_256Merging OpAMD64VPMOVSQDMasked256Merging - OpAMD64VPMOVSQWMasked128Merging - OpAMD64VPMOVSWBMasked128Merging + OpAMD64VPMOVSQWMasked128_128Merging + OpAMD64VPMOVSQWMasked128_256Merging + OpAMD64VPMOVSQWMasked128_512Merging + OpAMD64VPMOVSWBMasked128_128Merging + OpAMD64VPMOVSWBMasked128_256Merging OpAMD64VPMOVSWBMasked256Merging OpAMD64VPMOVSXBDMasked128Merging OpAMD64VPMOVSXBDMasked256Merging @@ -3339,16 +3410,26 @@ const ( OpAMD64VPMOVSXWQMasked128Merging OpAMD64VPMOVSXWQMasked256Merging OpAMD64VPMOVSXWQMasked512Merging - OpAMD64VPMOVUSDBMasked128Merging - OpAMD64VPMOVUSDWMasked128Merging + OpAMD64VPMOVUSDBMasked128_128Merging + OpAMD64VPMOVUSDBMasked128_256Merging + OpAMD64VPMOVUSDBMasked128_512Merging + OpAMD64VPMOVUSDWMasked128_128Merging + OpAMD64VPMOVUSDWMasked128_256Merging OpAMD64VPMOVUSDWMasked256Merging - OpAMD64VPMOVUSQBMasked128Merging - OpAMD64VPMOVUSQDMasked128Merging + OpAMD64VPMOVUSQBMasked128_128Merging + OpAMD64VPMOVUSQBMasked128_256Merging + OpAMD64VPMOVUSQBMasked128_512Merging + OpAMD64VPMOVUSQDMasked128_128Merging + OpAMD64VPMOVUSQDMasked128_256Merging OpAMD64VPMOVUSQDMasked256Merging - OpAMD64VPMOVUSQWMasked128Merging - OpAMD64VPMOVUSWBMasked128Merging + OpAMD64VPMOVUSQWMasked128_128Merging + OpAMD64VPMOVUSQWMasked128_256Merging + OpAMD64VPMOVUSQWMasked128_512Merging + OpAMD64VPMOVUSWBMasked128_128Merging + OpAMD64VPMOVUSWBMasked128_256Merging OpAMD64VPMOVUSWBMasked256Merging - OpAMD64VPMOVWBMasked128Merging + OpAMD64VPMOVWBMasked128_128Merging + OpAMD64VPMOVWBMasked128_256Merging OpAMD64VPMOVWBMasked256Merging OpAMD64VPMOVZXBDMasked128Merging OpAMD64VPMOVZXBDMasked256Merging @@ -29124,7 +29205,7 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVDB128", + name: "VPMOVDB128_128", argLen: 1, asm: x86.AVPMOVDB, reg: regInfo{ @@ -29137,7 +29218,61 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVDBMasked128", + name: "VPMOVDB128_256", + argLen: 1, + asm: x86.AVPMOVDB, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVDB128_512", + argLen: 1, + asm: x86.AVPMOVDB, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVDBMasked128_128", + argLen: 2, + asm: x86.AVPMOVDB, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVDBMasked128_256", + argLen: 2, + asm: x86.AVPMOVDB, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVDBMasked128_512", argLen: 2, asm: x86.AVPMOVDB, reg: regInfo{ @@ -29151,7 +29286,20 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVDW128", + name: "VPMOVDW128_128", + argLen: 1, + asm: x86.AVPMOVDW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVDW128_256", argLen: 1, asm: x86.AVPMOVDW, reg: regInfo{ @@ -29177,7 +29325,21 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVDWMasked128", + name: "VPMOVDWMasked128_128", + argLen: 2, + asm: x86.AVPMOVDW, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVDWMasked128_256", argLen: 2, asm: x86.AVPMOVDW, reg: regInfo{ @@ -29205,7 +29367,7 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVQB128", + name: "VPMOVQB128_128", argLen: 1, asm: x86.AVPMOVQB, reg: regInfo{ @@ -29218,7 +29380,61 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVQBMasked128", + name: "VPMOVQB128_256", + argLen: 1, + asm: x86.AVPMOVQB, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVQB128_512", + argLen: 1, + asm: x86.AVPMOVQB, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVQBMasked128_128", + argLen: 2, + asm: x86.AVPMOVQB, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVQBMasked128_256", + argLen: 2, + asm: x86.AVPMOVQB, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVQBMasked128_512", argLen: 2, asm: x86.AVPMOVQB, reg: regInfo{ @@ -29232,7 +29448,20 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVQD128", + name: "VPMOVQD128_128", + argLen: 1, + asm: x86.AVPMOVQD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVQD128_256", argLen: 1, asm: x86.AVPMOVQD, reg: regInfo{ @@ -29258,7 +29487,21 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVQDMasked128", + name: "VPMOVQDMasked128_128", + argLen: 2, + asm: x86.AVPMOVQD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVQDMasked128_256", argLen: 2, asm: x86.AVPMOVQD, reg: regInfo{ @@ -29286,7 +29529,7 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVQW128", + name: "VPMOVQW128_128", argLen: 1, asm: x86.AVPMOVQW, reg: regInfo{ @@ -29299,7 +29542,33 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVQWMasked128", + name: "VPMOVQW128_256", + argLen: 1, + asm: x86.AVPMOVQW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVQW128_512", + argLen: 1, + asm: x86.AVPMOVQW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVQWMasked128_128", argLen: 2, asm: x86.AVPMOVQW, reg: regInfo{ @@ -29313,7 +29582,61 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVSDB128", + name: "VPMOVQWMasked128_256", + argLen: 2, + asm: x86.AVPMOVQW, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVQWMasked128_512", + argLen: 2, + asm: x86.AVPMOVQW, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSDB128_128", + argLen: 1, + asm: x86.AVPMOVSDB, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSDB128_256", + argLen: 1, + asm: x86.AVPMOVSDB, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSDB128_512", argLen: 1, asm: x86.AVPMOVSDB, reg: regInfo{ @@ -29326,7 +29649,35 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVSDBMasked128", + name: "VPMOVSDBMasked128_128", + argLen: 2, + asm: x86.AVPMOVSDB, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSDBMasked128_256", + argLen: 2, + asm: x86.AVPMOVSDB, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSDBMasked128_512", argLen: 2, asm: x86.AVPMOVSDB, reg: regInfo{ @@ -29340,7 +29691,20 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVSDW128", + name: "VPMOVSDW128_128", + argLen: 1, + asm: x86.AVPMOVSDW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSDW128_256", argLen: 1, asm: x86.AVPMOVSDW, reg: regInfo{ @@ -29366,7 +29730,21 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVSDWMasked128", + name: "VPMOVSDWMasked128_128", + argLen: 2, + asm: x86.AVPMOVSDW, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSDWMasked128_256", argLen: 2, asm: x86.AVPMOVSDW, reg: regInfo{ @@ -29394,7 +29772,7 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVSQB128", + name: "VPMOVSQB128_128", argLen: 1, asm: x86.AVPMOVSQB, reg: regInfo{ @@ -29407,7 +29785,61 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVSQBMasked128", + name: "VPMOVSQB128_256", + argLen: 1, + asm: x86.AVPMOVSQB, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSQB128_512", + argLen: 1, + asm: x86.AVPMOVSQB, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSQBMasked128_128", + argLen: 2, + asm: x86.AVPMOVSQB, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSQBMasked128_256", + argLen: 2, + asm: x86.AVPMOVSQB, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSQBMasked128_512", argLen: 2, asm: x86.AVPMOVSQB, reg: regInfo{ @@ -29421,7 +29853,20 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVSQD128", + name: "VPMOVSQD128_128", + argLen: 1, + asm: x86.AVPMOVSQD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSQD128_256", argLen: 1, asm: x86.AVPMOVSQD, reg: regInfo{ @@ -29447,7 +29892,21 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVSQDMasked128", + name: "VPMOVSQDMasked128_128", + argLen: 2, + asm: x86.AVPMOVSQD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSQDMasked128_256", argLen: 2, asm: x86.AVPMOVSQD, reg: regInfo{ @@ -29475,7 +29934,7 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVSQW128", + name: "VPMOVSQW128_128", argLen: 1, asm: x86.AVPMOVSQW, reg: regInfo{ @@ -29488,7 +29947,47 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVSQWMasked128", + name: "VPMOVSQW128_256", + argLen: 1, + asm: x86.AVPMOVSQW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSQW128_512", + argLen: 1, + asm: x86.AVPMOVSQW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSQWMasked128_128", + argLen: 2, + asm: x86.AVPMOVSQW, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSQWMasked128_256", argLen: 2, asm: x86.AVPMOVSQW, reg: regInfo{ @@ -29502,7 +30001,34 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVSWB128", + name: "VPMOVSQWMasked128_512", + argLen: 2, + asm: x86.AVPMOVSQW, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSWB128_128", + argLen: 1, + asm: x86.AVPMOVSWB, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSWB128_256", argLen: 1, asm: x86.AVPMOVSWB, reg: regInfo{ @@ -29528,7 +30054,21 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVSWBMasked128", + name: "VPMOVSWBMasked128_128", + argLen: 2, + asm: x86.AVPMOVSWB, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSWBMasked128_256", argLen: 2, asm: x86.AVPMOVSWB, reg: regInfo{ @@ -30042,7 +30582,33 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVUSDB128", + name: "VPMOVUSDB128_128", + argLen: 1, + asm: x86.AVPMOVUSDB, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSDB128_256", + argLen: 1, + asm: x86.AVPMOVUSDB, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSDB128_512", argLen: 1, asm: x86.AVPMOVUSDB, reg: regInfo{ @@ -30055,7 +30621,21 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVUSDBMasked128", + name: "VPMOVUSDBMasked128_128", + argLen: 2, + asm: x86.AVPMOVUSDB, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSDBMasked128_256", argLen: 2, asm: x86.AVPMOVUSDB, reg: regInfo{ @@ -30069,7 +30649,34 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVUSDW128", + name: "VPMOVUSDBMasked128_512", + argLen: 2, + asm: x86.AVPMOVUSDB, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSDW128_128", + argLen: 1, + asm: x86.AVPMOVUSDW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSDW128_256", argLen: 1, asm: x86.AVPMOVUSDW, reg: regInfo{ @@ -30095,7 +30702,21 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVUSDWMasked128", + name: "VPMOVUSDWMasked128_128", + argLen: 2, + asm: x86.AVPMOVUSDW, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSDWMasked128_256", argLen: 2, asm: x86.AVPMOVUSDW, reg: regInfo{ @@ -30123,7 +30744,20 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVUSQB128", + name: "VPMOVUSQB128_128", + argLen: 1, + asm: x86.AVPMOVUSQB, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSQB128_256", argLen: 1, asm: x86.AVPMOVUSQB, reg: regInfo{ @@ -30136,7 +30770,20 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVUSQBMasked128", + name: "VPMOVUSQB128_512", + argLen: 1, + asm: x86.AVPMOVUSQB, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSQBMasked128_128", argLen: 2, asm: x86.AVPMOVUSQB, reg: regInfo{ @@ -30150,7 +30797,48 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVUSQD128", + name: "VPMOVUSQBMasked128_256", + argLen: 2, + asm: x86.AVPMOVUSQB, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSQBMasked128_512", + argLen: 2, + asm: x86.AVPMOVUSQB, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSQD128_128", + argLen: 1, + asm: x86.AVPMOVUSQD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSQD128_256", argLen: 1, asm: x86.AVPMOVUSQD, reg: regInfo{ @@ -30176,7 +30864,21 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVUSQDMasked128", + name: "VPMOVUSQDMasked128_128", + argLen: 2, + asm: x86.AVPMOVUSQD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSQDMasked128_256", argLen: 2, asm: x86.AVPMOVUSQD, reg: regInfo{ @@ -30204,7 +30906,20 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVUSQW128", + name: "VPMOVUSQW128_128", + argLen: 1, + asm: x86.AVPMOVUSQW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSQW128_256", argLen: 1, asm: x86.AVPMOVUSQW, reg: regInfo{ @@ -30217,7 +30932,20 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVUSQWMasked128", + name: "VPMOVUSQW128_512", + argLen: 1, + asm: x86.AVPMOVUSQW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSQWMasked128_128", argLen: 2, asm: x86.AVPMOVUSQW, reg: regInfo{ @@ -30231,7 +30959,48 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVUSWB128", + name: "VPMOVUSQWMasked128_256", + argLen: 2, + asm: x86.AVPMOVUSQW, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSQWMasked128_512", + argLen: 2, + asm: x86.AVPMOVUSQW, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSWB128_128", + argLen: 1, + asm: x86.AVPMOVUSWB, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSWB128_256", argLen: 1, asm: x86.AVPMOVUSWB, reg: regInfo{ @@ -30257,7 +31026,21 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVUSWBMasked128", + name: "VPMOVUSWBMasked128_128", + argLen: 2, + asm: x86.AVPMOVUSWB, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSWBMasked128_256", argLen: 2, asm: x86.AVPMOVUSWB, reg: regInfo{ @@ -30285,7 +31068,20 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVWB128", + name: "VPMOVWB128_128", + argLen: 1, + asm: x86.AVPMOVWB, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVWB128_256", argLen: 1, asm: x86.AVPMOVWB, reg: regInfo{ @@ -30311,7 +31107,21 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVWBMasked128", + name: "VPMOVWBMasked128_128", + argLen: 2, + asm: x86.AVPMOVWB, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVWBMasked128_256", argLen: 2, asm: x86.AVPMOVWB, reg: regInfo{ @@ -52247,7 +53057,7 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVDBMasked128Merging", + name: "VPMOVDBMasked128_128Merging", argLen: 3, resultInArg0: true, asm: x86.AVPMOVDB, @@ -52263,7 +53073,55 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVDWMasked128Merging", + name: "VPMOVDBMasked128_256Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVDB, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVDBMasked128_512Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVDB, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVDWMasked128_128Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVDW, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVDWMasked128_256Merging", argLen: 3, resultInArg0: true, asm: x86.AVPMOVDW, @@ -52295,7 +53153,39 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVQBMasked128Merging", + name: "VPMOVQBMasked128_128Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVQB, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVQBMasked128_256Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVQB, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVQBMasked128_512Merging", argLen: 3, resultInArg0: true, asm: x86.AVPMOVQB, @@ -52311,7 +53201,23 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVQDMasked128Merging", + name: "VPMOVQDMasked128_128Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVQD, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVQDMasked128_256Merging", argLen: 3, resultInArg0: true, asm: x86.AVPMOVQD, @@ -52343,7 +53249,7 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVQWMasked128Merging", + name: "VPMOVQWMasked128_128Merging", argLen: 3, resultInArg0: true, asm: x86.AVPMOVQW, @@ -52359,7 +53265,39 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVSDBMasked128Merging", + name: "VPMOVQWMasked128_256Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVQW, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVQWMasked128_512Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVQW, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSDBMasked128_128Merging", argLen: 3, resultInArg0: true, asm: x86.AVPMOVSDB, @@ -52375,7 +53313,55 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVSDWMasked128Merging", + name: "VPMOVSDBMasked128_256Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVSDB, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSDBMasked128_512Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVSDB, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSDWMasked128_128Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVSDW, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSDWMasked128_256Merging", argLen: 3, resultInArg0: true, asm: x86.AVPMOVSDW, @@ -52407,7 +53393,23 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVSQBMasked128Merging", + name: "VPMOVSQBMasked128_128Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVSQB, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSQBMasked128_256Merging", argLen: 3, resultInArg0: true, asm: x86.AVPMOVSQB, @@ -52423,7 +53425,39 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVSQDMasked128Merging", + name: "VPMOVSQBMasked128_512Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVSQB, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSQDMasked128_128Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVSQD, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSQDMasked128_256Merging", argLen: 3, resultInArg0: true, asm: x86.AVPMOVSQD, @@ -52455,7 +53489,23 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVSQWMasked128Merging", + name: "VPMOVSQWMasked128_128Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVSQW, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSQWMasked128_256Merging", argLen: 3, resultInArg0: true, asm: x86.AVPMOVSQW, @@ -52471,7 +53521,39 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVSWBMasked128Merging", + name: "VPMOVSQWMasked128_512Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVSQW, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSWBMasked128_128Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVSWB, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVSWBMasked128_256Merging", argLen: 3, resultInArg0: true, asm: x86.AVPMOVSWB, @@ -52791,7 +53873,39 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVUSDBMasked128Merging", + name: "VPMOVUSDBMasked128_128Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVUSDB, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSDBMasked128_256Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVUSDB, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSDBMasked128_512Merging", argLen: 3, resultInArg0: true, asm: x86.AVPMOVUSDB, @@ -52807,7 +53921,23 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVUSDWMasked128Merging", + name: "VPMOVUSDWMasked128_128Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVUSDW, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSDWMasked128_256Merging", argLen: 3, resultInArg0: true, asm: x86.AVPMOVUSDW, @@ -52839,7 +53969,39 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVUSQBMasked128Merging", + name: "VPMOVUSQBMasked128_128Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVUSQB, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSQBMasked128_256Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVUSQB, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSQBMasked128_512Merging", argLen: 3, resultInArg0: true, asm: x86.AVPMOVUSQB, @@ -52855,7 +54017,23 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVUSQDMasked128Merging", + name: "VPMOVUSQDMasked128_128Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVUSQD, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSQDMasked128_256Merging", argLen: 3, resultInArg0: true, asm: x86.AVPMOVUSQD, @@ -52887,7 +54065,7 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVUSQWMasked128Merging", + name: "VPMOVUSQWMasked128_128Merging", argLen: 3, resultInArg0: true, asm: x86.AVPMOVUSQW, @@ -52903,7 +54081,55 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVUSWBMasked128Merging", + name: "VPMOVUSQWMasked128_256Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVUSQW, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSQWMasked128_512Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVUSQW, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSWBMasked128_128Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVUSWB, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVUSWBMasked128_256Merging", argLen: 3, resultInArg0: true, asm: x86.AVPMOVUSWB, @@ -52935,7 +54161,23 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPMOVWBMasked128Merging", + name: "VPMOVWBMasked128_128Merging", + argLen: 3, + resultInArg0: true, + asm: x86.AVPMOVWB, + reg: regInfo{ + inputs: []inputInfo{ + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPMOVWBMasked128_256Merging", argLen: 3, resultInArg0: true, asm: x86.AVPMOVWB, diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index bf0e79de0b..974af9d842 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -2574,19 +2574,19 @@ func rewriteValueAMD64(v *Value) bool { v.Op = OpAMD64VPMOVDW256 return true case OpConvertToInt16Int32x4: - v.Op = OpAMD64VPMOVDW128 + v.Op = OpAMD64VPMOVDW128_128 return true case OpConvertToInt16Int32x8: - v.Op = OpAMD64VPMOVDW128 + v.Op = OpAMD64VPMOVDW128_256 return true case OpConvertToInt16Int64x2: - v.Op = OpAMD64VPMOVQW128 + v.Op = OpAMD64VPMOVQW128_128 return true case OpConvertToInt16Int64x4: - v.Op = OpAMD64VPMOVQW128 + v.Op = OpAMD64VPMOVQW128_256 return true case OpConvertToInt16Int64x8: - v.Op = OpAMD64VPMOVQW128 + v.Op = OpAMD64VPMOVQW128_512 return true case OpConvertToInt16Int8x16: v.Op = OpAMD64VPMOVSXBW256 @@ -2598,19 +2598,19 @@ func rewriteValueAMD64(v *Value) bool { v.Op = OpAMD64VPMOVSDW256 return true case OpConvertToInt16SaturatedInt32x4: - v.Op = OpAMD64VPMOVSDW128 + v.Op = OpAMD64VPMOVSDW128_128 return true case OpConvertToInt16SaturatedInt32x8: - v.Op = OpAMD64VPMOVSDW128 + v.Op = OpAMD64VPMOVSDW128_256 return true case OpConvertToInt16SaturatedInt64x2: - v.Op = OpAMD64VPMOVSQW128 + v.Op = OpAMD64VPMOVSQW128_128 return true case OpConvertToInt16SaturatedInt64x4: - v.Op = OpAMD64VPMOVSQW128 + v.Op = OpAMD64VPMOVSQW128_256 return true case OpConvertToInt16SaturatedInt64x8: - v.Op = OpAMD64VPMOVSQW128 + v.Op = OpAMD64VPMOVSQW128_512 return true case OpConvertToInt16SaturatedPackedInt32x16: v.Op = OpAMD64VPACKSSDW512 @@ -2640,10 +2640,10 @@ func rewriteValueAMD64(v *Value) bool { v.Op = OpAMD64VPMOVSXWD256 return true case OpConvertToInt32Int64x2: - v.Op = OpAMD64VPMOVQD128 + v.Op = OpAMD64VPMOVQD128_128 return true case OpConvertToInt32Int64x4: - v.Op = OpAMD64VPMOVQD128 + v.Op = OpAMD64VPMOVQD128_256 return true case OpConvertToInt32Int64x8: v.Op = OpAMD64VPMOVQD256 @@ -2652,10 +2652,10 @@ func rewriteValueAMD64(v *Value) bool { v.Op = OpAMD64VPMOVSXBD512 return true case OpConvertToInt32SaturatedInt64x2: - v.Op = OpAMD64VPMOVSQD128 + v.Op = OpAMD64VPMOVSQD128_128 return true case OpConvertToInt32SaturatedInt64x4: - v.Op = OpAMD64VPMOVSQD128 + v.Op = OpAMD64VPMOVSQD128_256 return true case OpConvertToInt32SaturatedInt64x8: v.Op = OpAMD64VPMOVSQD256 @@ -2694,58 +2694,58 @@ func rewriteValueAMD64(v *Value) bool { v.Op = OpAMD64VPMOVSXBQ512 return true case OpConvertToInt8Int16x16: - v.Op = OpAMD64VPMOVWB128 + v.Op = OpAMD64VPMOVWB128_256 return true case OpConvertToInt8Int16x32: v.Op = OpAMD64VPMOVWB256 return true case OpConvertToInt8Int16x8: - v.Op = OpAMD64VPMOVWB128 + v.Op = OpAMD64VPMOVWB128_128 return true case OpConvertToInt8Int32x16: - v.Op = OpAMD64VPMOVDB128 + v.Op = OpAMD64VPMOVDB128_512 return true case OpConvertToInt8Int32x4: - v.Op = OpAMD64VPMOVDB128 + v.Op = OpAMD64VPMOVDB128_128 return true case OpConvertToInt8Int32x8: - v.Op = OpAMD64VPMOVDB128 + v.Op = OpAMD64VPMOVDB128_256 return true case OpConvertToInt8Int64x2: - v.Op = OpAMD64VPMOVQB128 + v.Op = OpAMD64VPMOVQB128_128 return true case OpConvertToInt8Int64x4: - v.Op = OpAMD64VPMOVQB128 + v.Op = OpAMD64VPMOVQB128_256 return true case OpConvertToInt8Int64x8: - v.Op = OpAMD64VPMOVQB128 + v.Op = OpAMD64VPMOVQB128_512 return true case OpConvertToInt8SaturatedInt16x16: - v.Op = OpAMD64VPMOVSWB128 + v.Op = OpAMD64VPMOVSWB128_256 return true case OpConvertToInt8SaturatedInt16x32: v.Op = OpAMD64VPMOVSWB256 return true case OpConvertToInt8SaturatedInt16x8: - v.Op = OpAMD64VPMOVSWB128 + v.Op = OpAMD64VPMOVSWB128_128 return true case OpConvertToInt8SaturatedInt32x16: - v.Op = OpAMD64VPMOVSDB128 + v.Op = OpAMD64VPMOVSDB128_512 return true case OpConvertToInt8SaturatedInt32x4: - v.Op = OpAMD64VPMOVSDB128 + v.Op = OpAMD64VPMOVSDB128_128 return true case OpConvertToInt8SaturatedInt32x8: - v.Op = OpAMD64VPMOVSDB128 + v.Op = OpAMD64VPMOVSDB128_256 return true case OpConvertToInt8SaturatedInt64x2: - v.Op = OpAMD64VPMOVSQB128 + v.Op = OpAMD64VPMOVSQB128_128 return true case OpConvertToInt8SaturatedInt64x4: - v.Op = OpAMD64VPMOVSQB128 + v.Op = OpAMD64VPMOVSQB128_256 return true case OpConvertToInt8SaturatedInt64x8: - v.Op = OpAMD64VPMOVSQB128 + v.Op = OpAMD64VPMOVSQB128_512 return true case OpConvertToUint16SaturatedPackedUint32x16: v.Op = OpAMD64VPACKUSDW512 @@ -2760,37 +2760,37 @@ func rewriteValueAMD64(v *Value) bool { v.Op = OpAMD64VPMOVUSDW256 return true case OpConvertToUint16SaturatedUint32x4: - v.Op = OpAMD64VPMOVUSDW128 + v.Op = OpAMD64VPMOVUSDW128_128 return true case OpConvertToUint16SaturatedUint32x8: - v.Op = OpAMD64VPMOVUSDW128 + v.Op = OpAMD64VPMOVUSDW128_256 return true case OpConvertToUint16SaturatedUint64x2: - v.Op = OpAMD64VPMOVUSQW128 + v.Op = OpAMD64VPMOVUSQW128_128 return true case OpConvertToUint16SaturatedUint64x4: - v.Op = OpAMD64VPMOVUSQW128 + v.Op = OpAMD64VPMOVUSQW128_256 return true case OpConvertToUint16SaturatedUint64x8: - v.Op = OpAMD64VPMOVUSQW128 + v.Op = OpAMD64VPMOVUSQW128_512 return true case OpConvertToUint16Uint32x16: v.Op = OpAMD64VPMOVDW256 return true case OpConvertToUint16Uint32x4: - v.Op = OpAMD64VPMOVDW128 + v.Op = OpAMD64VPMOVDW128_128 return true case OpConvertToUint16Uint32x8: - v.Op = OpAMD64VPMOVDW128 + v.Op = OpAMD64VPMOVDW128_256 return true case OpConvertToUint16Uint64x2: - v.Op = OpAMD64VPMOVQW128 + v.Op = OpAMD64VPMOVQW128_128 return true case OpConvertToUint16Uint64x4: - v.Op = OpAMD64VPMOVQW128 + v.Op = OpAMD64VPMOVQW128_256 return true case OpConvertToUint16Uint64x8: - v.Op = OpAMD64VPMOVQW128 + v.Op = OpAMD64VPMOVQW128_512 return true case OpConvertToUint16Uint8x16: v.Op = OpAMD64VPMOVZXBW256 @@ -2811,10 +2811,10 @@ func rewriteValueAMD64(v *Value) bool { v.Op = OpAMD64VCVTPS2UDQ256 return true case OpConvertToUint32SaturatedUint64x2: - v.Op = OpAMD64VPMOVUSQD128 + v.Op = OpAMD64VPMOVUSQD128_128 return true case OpConvertToUint32SaturatedUint64x4: - v.Op = OpAMD64VPMOVUSQD128 + v.Op = OpAMD64VPMOVUSQD128_256 return true case OpConvertToUint32SaturatedUint64x8: v.Op = OpAMD64VPMOVUSQD256 @@ -2826,10 +2826,10 @@ func rewriteValueAMD64(v *Value) bool { v.Op = OpAMD64VPMOVZXWD256 return true case OpConvertToUint32Uint64x2: - v.Op = OpAMD64VPMOVQD128 + v.Op = OpAMD64VPMOVQD128_128 return true case OpConvertToUint32Uint64x4: - v.Op = OpAMD64VPMOVQD128 + v.Op = OpAMD64VPMOVQD128_256 return true case OpConvertToUint32Uint64x8: v.Op = OpAMD64VPMOVQD256 @@ -2877,58 +2877,58 @@ func rewriteValueAMD64(v *Value) bool { v.Op = OpAMD64VPMOVZXBQ512 return true case OpConvertToUint8SaturatedUint16x16: - v.Op = OpAMD64VPMOVUSWB128 + v.Op = OpAMD64VPMOVUSWB128_256 return true case OpConvertToUint8SaturatedUint16x32: v.Op = OpAMD64VPMOVUSWB256 return true case OpConvertToUint8SaturatedUint16x8: - v.Op = OpAMD64VPMOVUSWB128 + v.Op = OpAMD64VPMOVUSWB128_128 return true case OpConvertToUint8SaturatedUint32x16: - v.Op = OpAMD64VPMOVUSDB128 + v.Op = OpAMD64VPMOVUSDB128_512 return true case OpConvertToUint8SaturatedUint32x4: - v.Op = OpAMD64VPMOVUSDB128 + v.Op = OpAMD64VPMOVUSDB128_128 return true case OpConvertToUint8SaturatedUint32x8: - v.Op = OpAMD64VPMOVUSDB128 + v.Op = OpAMD64VPMOVUSDB128_256 return true case OpConvertToUint8SaturatedUint64x2: - v.Op = OpAMD64VPMOVUSQB128 + v.Op = OpAMD64VPMOVUSQB128_128 return true case OpConvertToUint8SaturatedUint64x4: - v.Op = OpAMD64VPMOVUSQB128 + v.Op = OpAMD64VPMOVUSQB128_256 return true case OpConvertToUint8SaturatedUint64x8: - v.Op = OpAMD64VPMOVUSQB128 + v.Op = OpAMD64VPMOVUSQB128_512 return true case OpConvertToUint8Uint16x16: - v.Op = OpAMD64VPMOVWB128 + v.Op = OpAMD64VPMOVWB128_256 return true case OpConvertToUint8Uint16x32: v.Op = OpAMD64VPMOVWB256 return true case OpConvertToUint8Uint16x8: - v.Op = OpAMD64VPMOVWB128 + v.Op = OpAMD64VPMOVWB128_128 return true case OpConvertToUint8Uint32x16: - v.Op = OpAMD64VPMOVDB128 + v.Op = OpAMD64VPMOVDB128_512 return true case OpConvertToUint8Uint32x4: - v.Op = OpAMD64VPMOVDB128 + v.Op = OpAMD64VPMOVDB128_128 return true case OpConvertToUint8Uint32x8: - v.Op = OpAMD64VPMOVDB128 + v.Op = OpAMD64VPMOVDB128_256 return true case OpConvertToUint8Uint64x2: - v.Op = OpAMD64VPMOVQB128 + v.Op = OpAMD64VPMOVQB128_128 return true case OpConvertToUint8Uint64x4: - v.Op = OpAMD64VPMOVQB128 + v.Op = OpAMD64VPMOVQB128_256 return true case OpConvertToUint8Uint64x8: - v.Op = OpAMD64VPMOVQB128 + v.Op = OpAMD64VPMOVQB128_512 return true case OpCopySignInt16x16: v.Op = OpAMD64VPSIGNW256 @@ -31243,27 +31243,27 @@ func rewriteValueAMD64_OpAMD64VMOVDQU16Masked128(v *Value) bool { v.AddArg2(x, mask) return true } - // match: (VMOVDQU16Masked128 (VPMOVWB128 x) mask) - // result: (VPMOVWBMasked128 x mask) + // match: (VMOVDQU16Masked128 (VPMOVWB128_128 x) mask) + // result: (VPMOVWBMasked128_128 x mask) for { - if v_0.Op != OpAMD64VPMOVWB128 { + if v_0.Op != OpAMD64VPMOVWB128_128 { break } x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVWBMasked128) + v.reset(OpAMD64VPMOVWBMasked128_128) v.AddArg2(x, mask) return true } - // match: (VMOVDQU16Masked128 (VPMOVSWB128 x) mask) - // result: (VPMOVSWBMasked128 x mask) + // match: (VMOVDQU16Masked128 (VPMOVSWB128_128 x) mask) + // result: (VPMOVSWBMasked128_128 x mask) for { - if v_0.Op != OpAMD64VPMOVSWB128 { + if v_0.Op != OpAMD64VPMOVSWB128_128 { break } x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVSWBMasked128) + v.reset(OpAMD64VPMOVSWBMasked128_128) v.AddArg2(x, mask) return true } @@ -31291,15 +31291,15 @@ func rewriteValueAMD64_OpAMD64VMOVDQU16Masked128(v *Value) bool { v.AddArg2(x, mask) return true } - // match: (VMOVDQU16Masked128 (VPMOVUSWB128 x) mask) - // result: (VPMOVUSWBMasked128 x mask) + // match: (VMOVDQU16Masked128 (VPMOVUSWB128_128 x) mask) + // result: (VPMOVUSWBMasked128_128 x mask) for { - if v_0.Op != OpAMD64VPMOVUSWB128 { + if v_0.Op != OpAMD64VPMOVUSWB128_128 { break } x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVUSWBMasked128) + v.reset(OpAMD64VPMOVUSWBMasked128_128) v.AddArg2(x, mask) return true } @@ -31781,6 +31781,18 @@ func rewriteValueAMD64_OpAMD64VMOVDQU16Masked256(v *Value) bool { v.AddArg2(x, mask) return true } + // match: (VMOVDQU16Masked256 (VPMOVWB128_256 x) mask) + // result: (VPMOVWBMasked128_256 x mask) + for { + if v_0.Op != OpAMD64VPMOVWB128_256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVWBMasked128_256) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU16Masked256 (VPMOVWB256 x) mask) // result: (VPMOVWBMasked256 x mask) for { @@ -31793,6 +31805,18 @@ func rewriteValueAMD64_OpAMD64VMOVDQU16Masked256(v *Value) bool { v.AddArg2(x, mask) return true } + // match: (VMOVDQU16Masked256 (VPMOVSWB128_256 x) mask) + // result: (VPMOVSWBMasked128_256 x mask) + for { + if v_0.Op != OpAMD64VPMOVSWB128_256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSWBMasked128_256) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU16Masked256 (VPMOVSWB256 x) mask) // result: (VPMOVSWBMasked256 x mask) for { @@ -31817,6 +31841,18 @@ func rewriteValueAMD64_OpAMD64VMOVDQU16Masked256(v *Value) bool { v.AddArg2(x, mask) return true } + // match: (VMOVDQU16Masked256 (VPMOVUSWB128_256 x) mask) + // result: (VPMOVUSWBMasked128_256 x mask) + for { + if v_0.Op != OpAMD64VPMOVUSWB128_256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVUSWBMasked128_256) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU16Masked256 (VPMOVUSWB256 x) mask) // result: (VPMOVUSWBMasked256 x mask) for { @@ -32863,51 +32899,51 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked128(v *Value) bool { v.AddArg2(x, mask) return true } - // match: (VMOVDQU32Masked128 (VPMOVDB128 x) mask) - // result: (VPMOVDBMasked128 x mask) + // match: (VMOVDQU32Masked128 (VPMOVDB128_128 x) mask) + // result: (VPMOVDBMasked128_128 x mask) for { - if v_0.Op != OpAMD64VPMOVDB128 { + if v_0.Op != OpAMD64VPMOVDB128_128 { break } x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVDBMasked128) + v.reset(OpAMD64VPMOVDBMasked128_128) v.AddArg2(x, mask) return true } - // match: (VMOVDQU32Masked128 (VPMOVSDB128 x) mask) - // result: (VPMOVSDBMasked128 x mask) + // match: (VMOVDQU32Masked128 (VPMOVSDB128_128 x) mask) + // result: (VPMOVSDBMasked128_128 x mask) for { - if v_0.Op != OpAMD64VPMOVSDB128 { + if v_0.Op != OpAMD64VPMOVSDB128_128 { break } x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVSDBMasked128) + v.reset(OpAMD64VPMOVSDBMasked128_128) v.AddArg2(x, mask) return true } - // match: (VMOVDQU32Masked128 (VPMOVDW128 x) mask) - // result: (VPMOVDWMasked128 x mask) + // match: (VMOVDQU32Masked128 (VPMOVDW128_128 x) mask) + // result: (VPMOVDWMasked128_128 x mask) for { - if v_0.Op != OpAMD64VPMOVDW128 { + if v_0.Op != OpAMD64VPMOVDW128_128 { break } x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVDWMasked128) + v.reset(OpAMD64VPMOVDWMasked128_128) v.AddArg2(x, mask) return true } - // match: (VMOVDQU32Masked128 (VPMOVSDW128 x) mask) - // result: (VPMOVSDWMasked128 x mask) + // match: (VMOVDQU32Masked128 (VPMOVSDW128_128 x) mask) + // result: (VPMOVSDWMasked128_128 x mask) for { - if v_0.Op != OpAMD64VPMOVSDW128 { + if v_0.Op != OpAMD64VPMOVSDW128_128 { break } x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVSDWMasked128) + v.reset(OpAMD64VPMOVSDWMasked128_128) v.AddArg2(x, mask) return true } @@ -32948,27 +32984,27 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked128(v *Value) bool { v.AddArg2(x, mask) return true } - // match: (VMOVDQU32Masked128 (VPMOVUSDB128 x) mask) - // result: (VPMOVUSDBMasked128 x mask) + // match: (VMOVDQU32Masked128 (VPMOVUSDB128_128 x) mask) + // result: (VPMOVUSDBMasked128_128 x mask) for { - if v_0.Op != OpAMD64VPMOVUSDB128 { + if v_0.Op != OpAMD64VPMOVUSDB128_128 { break } x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVUSDBMasked128) + v.reset(OpAMD64VPMOVUSDBMasked128_128) v.AddArg2(x, mask) return true } - // match: (VMOVDQU32Masked128 (VPMOVUSDW128 x) mask) - // result: (VPMOVUSDWMasked128 x mask) + // match: (VMOVDQU32Masked128 (VPMOVUSDW128_128 x) mask) + // result: (VPMOVUSDWMasked128_128 x mask) for { - if v_0.Op != OpAMD64VPMOVUSDW128 { + if v_0.Op != OpAMD64VPMOVUSDW128_128 { break } x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVUSDWMasked128) + v.reset(OpAMD64VPMOVUSDWMasked128_128) v.AddArg2(x, mask) return true } @@ -33626,6 +33662,42 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked256(v *Value) bool { v.AddArg2(x, mask) return true } + // match: (VMOVDQU32Masked256 (VPMOVDB128_256 x) mask) + // result: (VPMOVDBMasked128_256 x mask) + for { + if v_0.Op != OpAMD64VPMOVDB128_256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVDBMasked128_256) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU32Masked256 (VPMOVSDB128_256 x) mask) + // result: (VPMOVSDBMasked128_256 x mask) + for { + if v_0.Op != OpAMD64VPMOVSDB128_256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSDBMasked128_256) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU32Masked256 (VPMOVDW128_256 x) mask) + // result: (VPMOVDWMasked128_256 x mask) + for { + if v_0.Op != OpAMD64VPMOVDW128_256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVDWMasked128_256) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU32Masked256 (VPMOVDW256 x) mask) // result: (VPMOVDWMasked256 x mask) for { @@ -33638,6 +33710,18 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked256(v *Value) bool { v.AddArg2(x, mask) return true } + // match: (VMOVDQU32Masked256 (VPMOVSDW128_256 x) mask) + // result: (VPMOVSDWMasked128_256 x mask) + for { + if v_0.Op != OpAMD64VPMOVSDW128_256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSDWMasked128_256) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU32Masked256 (VPMOVSDW256 x) mask) // result: (VPMOVSDWMasked256 x mask) for { @@ -33687,6 +33771,30 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked256(v *Value) bool { v.AddArg2(x, mask) return true } + // match: (VMOVDQU32Masked256 (VPMOVUSDB128_256 x) mask) + // result: (VPMOVUSDBMasked128_256 x mask) + for { + if v_0.Op != OpAMD64VPMOVUSDB128_256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVUSDBMasked128_256) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU32Masked256 (VPMOVUSDW128_256 x) mask) + // result: (VPMOVUSDWMasked128_256 x mask) + for { + if v_0.Op != OpAMD64VPMOVUSDW128_256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVUSDWMasked128_256) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU32Masked256 (VPMOVUSDW256 x) mask) // result: (VPMOVUSDWMasked256 x mask) for { @@ -34405,6 +34513,30 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked512(v *Value) bool { v.AddArg2(x, mask) return true } + // match: (VMOVDQU32Masked512 (VPMOVDB128_512 x) mask) + // result: (VPMOVDBMasked128_512 x mask) + for { + if v_0.Op != OpAMD64VPMOVDB128_512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVDBMasked128_512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU32Masked512 (VPMOVSDB128_512 x) mask) + // result: (VPMOVSDBMasked128_512 x mask) + for { + if v_0.Op != OpAMD64VPMOVSDB128_512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSDBMasked128_512) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU32Masked512 (VPACKSSDW512 x y) mask) // result: (VPACKSSDWMasked512 x y mask) for { @@ -34442,6 +34574,18 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked512(v *Value) bool { v.AddArg2(x, mask) return true } + // match: (VMOVDQU32Masked512 (VPMOVUSDB128_512 x) mask) + // result: (VPMOVUSDBMasked128_512 x mask) + for { + if v_0.Op != OpAMD64VPMOVUSDB128_512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVUSDBMasked128_512) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU32Masked512 (VPACKUSDW512 x y) mask) // result: (VPACKUSDWMasked512 x y mask) for { @@ -35132,111 +35276,111 @@ func rewriteValueAMD64_OpAMD64VMOVDQU64Masked128(v *Value) bool { v.AddArg2(x, mask) return true } - // match: (VMOVDQU64Masked128 (VPMOVQB128 x) mask) - // result: (VPMOVQBMasked128 x mask) + // match: (VMOVDQU64Masked128 (VPMOVQB128_128 x) mask) + // result: (VPMOVQBMasked128_128 x mask) for { - if v_0.Op != OpAMD64VPMOVQB128 { + if v_0.Op != OpAMD64VPMOVQB128_128 { break } x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVQBMasked128) + v.reset(OpAMD64VPMOVQBMasked128_128) v.AddArg2(x, mask) return true } - // match: (VMOVDQU64Masked128 (VPMOVSQB128 x) mask) - // result: (VPMOVSQBMasked128 x mask) + // match: (VMOVDQU64Masked128 (VPMOVSQB128_128 x) mask) + // result: (VPMOVSQBMasked128_128 x mask) for { - if v_0.Op != OpAMD64VPMOVSQB128 { + if v_0.Op != OpAMD64VPMOVSQB128_128 { break } x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVSQBMasked128) + v.reset(OpAMD64VPMOVSQBMasked128_128) v.AddArg2(x, mask) return true } - // match: (VMOVDQU64Masked128 (VPMOVQW128 x) mask) - // result: (VPMOVQWMasked128 x mask) + // match: (VMOVDQU64Masked128 (VPMOVQW128_128 x) mask) + // result: (VPMOVQWMasked128_128 x mask) for { - if v_0.Op != OpAMD64VPMOVQW128 { + if v_0.Op != OpAMD64VPMOVQW128_128 { break } x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVQWMasked128) + v.reset(OpAMD64VPMOVQWMasked128_128) v.AddArg2(x, mask) return true } - // match: (VMOVDQU64Masked128 (VPMOVSQW128 x) mask) - // result: (VPMOVSQWMasked128 x mask) + // match: (VMOVDQU64Masked128 (VPMOVSQW128_128 x) mask) + // result: (VPMOVSQWMasked128_128 x mask) for { - if v_0.Op != OpAMD64VPMOVSQW128 { + if v_0.Op != OpAMD64VPMOVSQW128_128 { break } x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVSQWMasked128) + v.reset(OpAMD64VPMOVSQWMasked128_128) v.AddArg2(x, mask) return true } - // match: (VMOVDQU64Masked128 (VPMOVQD128 x) mask) - // result: (VPMOVQDMasked128 x mask) + // match: (VMOVDQU64Masked128 (VPMOVQD128_128 x) mask) + // result: (VPMOVQDMasked128_128 x mask) for { - if v_0.Op != OpAMD64VPMOVQD128 { + if v_0.Op != OpAMD64VPMOVQD128_128 { break } x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVQDMasked128) + v.reset(OpAMD64VPMOVQDMasked128_128) v.AddArg2(x, mask) return true } - // match: (VMOVDQU64Masked128 (VPMOVSQD128 x) mask) - // result: (VPMOVSQDMasked128 x mask) + // match: (VMOVDQU64Masked128 (VPMOVSQD128_128 x) mask) + // result: (VPMOVSQDMasked128_128 x mask) for { - if v_0.Op != OpAMD64VPMOVSQD128 { + if v_0.Op != OpAMD64VPMOVSQD128_128 { break } x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVSQDMasked128) + v.reset(OpAMD64VPMOVSQDMasked128_128) v.AddArg2(x, mask) return true } - // match: (VMOVDQU64Masked128 (VPMOVUSQB128 x) mask) - // result: (VPMOVUSQBMasked128 x mask) + // match: (VMOVDQU64Masked128 (VPMOVUSQB128_128 x) mask) + // result: (VPMOVUSQBMasked128_128 x mask) for { - if v_0.Op != OpAMD64VPMOVUSQB128 { + if v_0.Op != OpAMD64VPMOVUSQB128_128 { break } x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVUSQBMasked128) + v.reset(OpAMD64VPMOVUSQBMasked128_128) v.AddArg2(x, mask) return true } - // match: (VMOVDQU64Masked128 (VPMOVUSQW128 x) mask) - // result: (VPMOVUSQWMasked128 x mask) + // match: (VMOVDQU64Masked128 (VPMOVUSQW128_128 x) mask) + // result: (VPMOVUSQWMasked128_128 x mask) for { - if v_0.Op != OpAMD64VPMOVUSQW128 { + if v_0.Op != OpAMD64VPMOVUSQW128_128 { break } x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVUSQWMasked128) + v.reset(OpAMD64VPMOVUSQWMasked128_128) v.AddArg2(x, mask) return true } - // match: (VMOVDQU64Masked128 (VPMOVUSQD128 x) mask) - // result: (VPMOVUSQDMasked128 x mask) + // match: (VMOVDQU64Masked128 (VPMOVUSQD128_128 x) mask) + // result: (VPMOVUSQDMasked128_128 x mask) for { - if v_0.Op != OpAMD64VPMOVUSQD128 { + if v_0.Op != OpAMD64VPMOVUSQD128_128 { break } x := v_0.Args[0] mask := v_1 - v.reset(OpAMD64VPMOVUSQDMasked128) + v.reset(OpAMD64VPMOVUSQDMasked128_128) v.AddArg2(x, mask) return true } @@ -35839,6 +35983,66 @@ func rewriteValueAMD64_OpAMD64VMOVDQU64Masked256(v *Value) bool { v.AddArg2(x, mask) return true } + // match: (VMOVDQU64Masked256 (VPMOVQB128_256 x) mask) + // result: (VPMOVQBMasked128_256 x mask) + for { + if v_0.Op != OpAMD64VPMOVQB128_256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVQBMasked128_256) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked256 (VPMOVSQB128_256 x) mask) + // result: (VPMOVSQBMasked128_256 x mask) + for { + if v_0.Op != OpAMD64VPMOVSQB128_256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSQBMasked128_256) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked256 (VPMOVQW128_256 x) mask) + // result: (VPMOVQWMasked128_256 x mask) + for { + if v_0.Op != OpAMD64VPMOVQW128_256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVQWMasked128_256) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked256 (VPMOVSQW128_256 x) mask) + // result: (VPMOVSQWMasked128_256 x mask) + for { + if v_0.Op != OpAMD64VPMOVSQW128_256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSQWMasked128_256) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked256 (VPMOVQD128_256 x) mask) + // result: (VPMOVQDMasked128_256 x mask) + for { + if v_0.Op != OpAMD64VPMOVQD128_256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVQDMasked128_256) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU64Masked256 (VPMOVQD256 x) mask) // result: (VPMOVQDMasked256 x mask) for { @@ -35851,6 +36055,18 @@ func rewriteValueAMD64_OpAMD64VMOVDQU64Masked256(v *Value) bool { v.AddArg2(x, mask) return true } + // match: (VMOVDQU64Masked256 (VPMOVSQD128_256 x) mask) + // result: (VPMOVSQDMasked128_256 x mask) + for { + if v_0.Op != OpAMD64VPMOVSQD128_256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSQDMasked128_256) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU64Masked256 (VPMOVSQD256 x) mask) // result: (VPMOVSQDMasked256 x mask) for { @@ -35863,6 +36079,42 @@ func rewriteValueAMD64_OpAMD64VMOVDQU64Masked256(v *Value) bool { v.AddArg2(x, mask) return true } + // match: (VMOVDQU64Masked256 (VPMOVUSQB128_256 x) mask) + // result: (VPMOVUSQBMasked128_256 x mask) + for { + if v_0.Op != OpAMD64VPMOVUSQB128_256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVUSQBMasked128_256) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked256 (VPMOVUSQW128_256 x) mask) + // result: (VPMOVUSQWMasked128_256 x mask) + for { + if v_0.Op != OpAMD64VPMOVUSQW128_256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVUSQWMasked128_256) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked256 (VPMOVUSQD128_256 x) mask) + // result: (VPMOVUSQDMasked128_256 x mask) + for { + if v_0.Op != OpAMD64VPMOVUSQD128_256 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVUSQDMasked128_256) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU64Masked256 (VPMOVUSQD256 x) mask) // result: (VPMOVUSQDMasked256 x mask) for { @@ -36526,6 +36778,78 @@ func rewriteValueAMD64_OpAMD64VMOVDQU64Masked512(v *Value) bool { v.AddArg2(x, mask) return true } + // match: (VMOVDQU64Masked512 (VPMOVQB128_512 x) mask) + // result: (VPMOVQBMasked128_512 x mask) + for { + if v_0.Op != OpAMD64VPMOVQB128_512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVQBMasked128_512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPMOVSQB128_512 x) mask) + // result: (VPMOVSQBMasked128_512 x mask) + for { + if v_0.Op != OpAMD64VPMOVSQB128_512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSQBMasked128_512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPMOVQW128_512 x) mask) + // result: (VPMOVQWMasked128_512 x mask) + for { + if v_0.Op != OpAMD64VPMOVQW128_512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVQWMasked128_512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPMOVSQW128_512 x) mask) + // result: (VPMOVSQWMasked128_512 x mask) + for { + if v_0.Op != OpAMD64VPMOVSQW128_512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSQWMasked128_512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPMOVUSQB128_512 x) mask) + // result: (VPMOVUSQBMasked128_512 x mask) + for { + if v_0.Op != OpAMD64VPMOVUSQB128_512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVUSQBMasked128_512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPMOVUSQW128_512 x) mask) + // result: (VPMOVUSQWMasked128_512 x mask) + for { + if v_0.Op != OpAMD64VPMOVUSQW128_512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVUSQWMasked128_512) + v.AddArg2(x, mask) + return true + } // match: (VMOVDQU64Masked512 (VDIVPD512 x y) mask) // result: (VDIVPDMasked512 x y mask) for { @@ -40279,61 +40603,19 @@ func rewriteValueAMD64_OpAMD64VPBLENDMBMasked512(v *Value) bool { v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMBMasked512 dst (VPOPCNTB512 x) mask) - // result: (VPOPCNTBMasked512Merging dst x mask) + // match: (VPBLENDMBMasked512 dst (VPABSB512 x) mask) + // result: (VPABSBMasked512Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPOPCNTB512 { + if v_1.Op != OpAMD64VPABSB512 { break } x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPOPCNTBMasked512Merging) + v.reset(OpAMD64VPABSBMasked512Merging) v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMBMasked512 dst (VPSUBSB512 x y) mask) - // result: (VPSUBSBMasked512Merging dst x y mask) - for { - dst := v_0 - if v_1.Op != OpAMD64VPSUBSB512 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPSUBSBMasked512Merging) - v.AddArg4(dst, x, y, mask) - return true - } - // match: (VPBLENDMBMasked512 dst (VPSUBB512 x y) mask) - // result: (VPSUBBMasked512Merging dst x y mask) - for { - dst := v_0 - if v_1.Op != OpAMD64VPSUBB512 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPSUBBMasked512Merging) - v.AddArg4(dst, x, y, mask) - return true - } - // match: (VPBLENDMBMasked512 dst (VPMINSB512 x y) mask) - // result: (VPMINSBMasked512Merging dst x y mask) - for { - dst := v_0 - if v_1.Op != OpAMD64VPMINSB512 { - break - } - y := v_1.Args[1] - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPMINSBMasked512Merging) - v.AddArg4(dst, x, y, mask) - return true - } // match: (VPBLENDMBMasked512 dst (VPADDB512 x y) mask) // result: (VPADDBMasked512Merging dst x y mask) for { @@ -40348,17 +40630,17 @@ func rewriteValueAMD64_OpAMD64VPBLENDMBMasked512(v *Value) bool { v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMBMasked512 dst (VPMAXUB512 x y) mask) - // result: (VPMAXUBMasked512Merging dst x y mask) + // match: (VPBLENDMBMasked512 dst (VPADDSB512 x y) mask) + // result: (VPADDSBMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMAXUB512 { + if v_1.Op != OpAMD64VPADDSB512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMAXUBMasked512Merging) + v.reset(OpAMD64VPADDSBMasked512Merging) v.AddArg4(dst, x, y, mask) return true } @@ -40390,48 +40672,75 @@ func rewriteValueAMD64_OpAMD64VPBLENDMBMasked512(v *Value) bool { v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMBMasked512 dst (VPMINUB512 x y) mask) - // result: (VPMINUBMasked512Merging dst x y mask) + // match: (VPBLENDMBMasked512 dst (VPMAXSB512 x y) mask) + // result: (VPMAXSBMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMINUB512 { + if v_1.Op != OpAMD64VPMAXSB512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMINUBMasked512Merging) + v.reset(OpAMD64VPMAXSBMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMBMasked512 dst (VPMAXSB512 x y) mask) - // result: (VPMAXSBMasked512Merging dst x y mask) + // match: (VPBLENDMBMasked512 dst (VPMAXUB512 x y) mask) + // result: (VPMAXUBMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMAXSB512 { + if v_1.Op != OpAMD64VPMAXUB512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMAXSBMasked512Merging) + v.reset(OpAMD64VPMAXUBMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMBMasked512 dst (VPSUBUSB512 x y) mask) - // result: (VPSUBUSBMasked512Merging dst x y mask) + // match: (VPBLENDMBMasked512 dst (VPMINSB512 x y) mask) + // result: (VPMINSBMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSUBUSB512 { + if v_1.Op != OpAMD64VPMINSB512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSUBUSBMasked512Merging) + v.reset(OpAMD64VPMINSBMasked512Merging) + v.AddArg4(dst, x, y, mask) + return true + } + // match: (VPBLENDMBMasked512 dst (VPMINUB512 x y) mask) + // result: (VPMINUBMasked512Merging dst x y mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VPMINUB512 { + break + } + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPMINUBMasked512Merging) v.AddArg4(dst, x, y, mask) return true } + // match: (VPBLENDMBMasked512 dst (VPOPCNTB512 x) mask) + // result: (VPOPCNTBMasked512Merging dst x mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VPOPCNTB512 { + break + } + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPOPCNTBMasked512Merging) + v.AddArg3(dst, x, mask) + return true + } // match: (VPBLENDMBMasked512 dst (VPSHUFB512 x y) mask) // result: (VPSHUFBMasked512Merging dst x y mask) for { @@ -40446,30 +40755,45 @@ func rewriteValueAMD64_OpAMD64VPBLENDMBMasked512(v *Value) bool { v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMBMasked512 dst (VPABSB512 x) mask) - // result: (VPABSBMasked512Merging dst x mask) + // match: (VPBLENDMBMasked512 dst (VPSUBB512 x y) mask) + // result: (VPSUBBMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPABSB512 { + if v_1.Op != OpAMD64VPSUBB512 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPABSBMasked512Merging) - v.AddArg3(dst, x, mask) + v.reset(OpAMD64VPSUBBMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMBMasked512 dst (VPADDSB512 x y) mask) - // result: (VPADDSBMasked512Merging dst x y mask) + // match: (VPBLENDMBMasked512 dst (VPSUBSB512 x y) mask) + // result: (VPSUBSBMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPADDSB512 { + if v_1.Op != OpAMD64VPSUBSB512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPADDSBMasked512Merging) + v.reset(OpAMD64VPSUBSBMasked512Merging) + v.AddArg4(dst, x, y, mask) + return true + } + // match: (VPBLENDMBMasked512 dst (VPSUBUSB512 x y) mask) + // result: (VPSUBUSBMasked512Merging dst x y mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VPSUBUSB512 { + break + } + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPSUBUSBMasked512Merging) v.AddArg4(dst, x, y, mask) return true } @@ -40479,547 +40803,547 @@ func rewriteValueAMD64_OpAMD64VPBLENDMDMasked512(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPBLENDMDMasked512 dst (VPMOVSDW256 x) mask) - // result: (VPMOVSDWMasked256Merging dst x mask) + // match: (VPBLENDMDMasked512 dst (VADDPS512 x y) mask) + // result: (VADDPSMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVSDW256 { + if v_1.Op != OpAMD64VADDPS512 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMOVSDWMasked256Merging) - v.AddArg3(dst, x, mask) + v.reset(OpAMD64VADDPSMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPLZCNTD512 x) mask) - // result: (VPLZCNTDMasked512Merging dst x mask) + // match: (VPBLENDMDMasked512 dst (VCVTPS2UDQ512 x) mask) + // result: (VCVTPS2UDQMasked512Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPLZCNTD512 { + if v_1.Op != OpAMD64VCVTPS2UDQ512 { break } x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPLZCNTDMasked512Merging) + v.reset(OpAMD64VCVTPS2UDQMasked512Merging) v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPMULLD512 x y) mask) - // result: (VPMULLDMasked512Merging dst x y mask) + // match: (VPBLENDMDMasked512 dst (VCVTTPS2DQ512 x) mask) + // result: (VCVTTPS2DQMasked512Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMULLD512 { + if v_1.Op != OpAMD64VCVTTPS2DQ512 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMULLDMasked512Merging) - v.AddArg4(dst, x, y, mask) + v.reset(OpAMD64VCVTTPS2DQMasked512Merging) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMDMasked512 dst (VMAXPS512 x y) mask) - // result: (VMAXPSMasked512Merging dst x y mask) + // match: (VPBLENDMDMasked512 dst (VDIVPS512 x y) mask) + // result: (VDIVPSMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VMAXPS512 { + if v_1.Op != OpAMD64VDIVPS512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VMAXPSMasked512Merging) + v.reset(OpAMD64VDIVPSMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPMOVUSDB128 x) mask) - // result: (VPMOVUSDBMasked128Merging dst x mask) + // match: (VPBLENDMDMasked512 dst (VMAXPS512 x y) mask) + // result: (VMAXPSMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVUSDB128 { + if v_1.Op != OpAMD64VMAXPS512 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMOVUSDBMasked128Merging) - v.AddArg3(dst, x, mask) + v.reset(OpAMD64VMAXPSMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VRSQRT14PS512 x) mask) - // result: (VRSQRT14PSMasked512Merging dst x mask) + // match: (VPBLENDMDMasked512 dst (VMINPS512 x y) mask) + // result: (VMINPSMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VRSQRT14PS512 { + if v_1.Op != OpAMD64VMINPS512 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VRSQRT14PSMasked512Merging) - v.AddArg3(dst, x, mask) + v.reset(OpAMD64VMINPSMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPMOVDW256 x) mask) - // result: (VPMOVDWMasked256Merging dst x mask) + // match: (VPBLENDMDMasked512 dst (VMULPS512 x y) mask) + // result: (VMULPSMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVDW256 { + if v_1.Op != OpAMD64VMULPS512 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMOVDWMasked256Merging) - v.AddArg3(dst, x, mask) + v.reset(OpAMD64VMULPSMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VRCP14PS512 x) mask) - // result: (VRCP14PSMasked512Merging dst x mask) + // match: (VPBLENDMDMasked512 dst (VPABSD512 x) mask) + // result: (VPABSDMasked512Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VRCP14PS512 { + if v_1.Op != OpAMD64VPABSD512 { break } x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VRCP14PSMasked512Merging) + v.reset(OpAMD64VPABSDMasked512Merging) v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMDMasked512 dst (VREDUCEPS512 [a] x) mask) - // result: (VREDUCEPSMasked512Merging dst [a] x mask) + // match: (VPBLENDMDMasked512 dst (VPACKSSDW512 x y) mask) + // result: (VPACKSSDWMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VREDUCEPS512 { + if v_1.Op != OpAMD64VPACKSSDW512 { break } - a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VREDUCEPSMasked512Merging) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(dst, x, mask) + v.reset(OpAMD64VPACKSSDWMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VDIVPS512 x y) mask) - // result: (VDIVPSMasked512Merging dst x y mask) + // match: (VPBLENDMDMasked512 dst (VPACKUSDW512 x y) mask) + // result: (VPACKUSDWMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VDIVPS512 { + if v_1.Op != OpAMD64VPACKUSDW512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VDIVPSMasked512Merging) + v.reset(OpAMD64VPACKUSDWMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPSRLVD512 x y) mask) - // result: (VPSRLVDMasked512Merging dst x y mask) + // match: (VPBLENDMDMasked512 dst (VPADDD512 x y) mask) + // result: (VPADDDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSRLVD512 { + if v_1.Op != OpAMD64VPADDD512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSRLVDMasked512Merging) + v.reset(OpAMD64VPADDDMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPSUBD512 x y) mask) - // result: (VPSUBDMasked512Merging dst x y mask) + // match: (VPBLENDMDMasked512 dst (VPANDD512 x y) mask) + // result: (VPANDDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSUBD512 { + if v_1.Op != OpAMD64VPANDD512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSUBDMasked512Merging) + v.reset(OpAMD64VPANDDMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPROLD512 [a] x) mask) - // result: (VPROLDMasked512Merging dst [a] x mask) + // match: (VPBLENDMDMasked512 dst (VPLZCNTD512 x) mask) + // result: (VPLZCNTDMasked512Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPROLD512 { + if v_1.Op != OpAMD64VPLZCNTD512 { break } - a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPROLDMasked512Merging) - v.AuxInt = uint8ToAuxInt(a) + v.reset(OpAMD64VPLZCNTDMasked512Merging) v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPORD512 x y) mask) - // result: (VPORDMasked512Merging dst x y mask) + // match: (VPBLENDMDMasked512 dst (VPMAXSD512 x y) mask) + // result: (VPMAXSDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPORD512 { + if v_1.Op != OpAMD64VPMAXSD512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPORDMasked512Merging) + v.reset(OpAMD64VPMAXSDMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPSHLDD512 [a] x y) mask) - // result: (VPSHLDDMasked512Merging dst [a] x y mask) + // match: (VPBLENDMDMasked512 dst (VPMAXUD512 x y) mask) + // result: (VPMAXUDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSHLDD512 { + if v_1.Op != OpAMD64VPMAXUD512 { break } - a := auxIntToUint8(v_1.AuxInt) y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSHLDDMasked512Merging) - v.AuxInt = uint8ToAuxInt(a) + v.reset(OpAMD64VPMAXUDMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPACKUSDW512 x y) mask) - // result: (VPACKUSDWMasked512Merging dst x y mask) + // match: (VPBLENDMDMasked512 dst (VPMINSD512 x y) mask) + // result: (VPMINSDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPACKUSDW512 { + if v_1.Op != OpAMD64VPMINSD512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPACKUSDWMasked512Merging) + v.reset(OpAMD64VPMINSDMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPMAXSD512 x y) mask) - // result: (VPMAXSDMasked512Merging dst x y mask) + // match: (VPBLENDMDMasked512 dst (VPMINUD512 x y) mask) + // result: (VPMINUDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMAXSD512 { + if v_1.Op != OpAMD64VPMINUD512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMAXSDMasked512Merging) + v.reset(OpAMD64VPMINUDMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VADDPS512 x y) mask) - // result: (VADDPSMasked512Merging dst x y mask) + // match: (VPBLENDMDMasked512 dst (VPMOVDB128_512 x) mask) + // result: (VPMOVDBMasked128_512Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VADDPS512 { + if v_1.Op != OpAMD64VPMOVDB128_512 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VADDPSMasked512Merging) - v.AddArg4(dst, x, y, mask) + v.reset(OpAMD64VPMOVDBMasked128_512Merging) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPMOVUSDW256 x) mask) - // result: (VPMOVUSDWMasked256Merging dst x mask) + // match: (VPBLENDMDMasked512 dst (VPMOVDW256 x) mask) + // result: (VPMOVDWMasked256Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVUSDW256 { + if v_1.Op != OpAMD64VPMOVDW256 { break } x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMOVUSDWMasked256Merging) + v.reset(OpAMD64VPMOVDWMasked256Merging) v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPMOVSDB128 x) mask) - // result: (VPMOVSDBMasked128Merging dst x mask) + // match: (VPBLENDMDMasked512 dst (VPMOVSDB128_512 x) mask) + // result: (VPMOVSDBMasked128_512Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVSDB128 { + if v_1.Op != OpAMD64VPMOVSDB128_512 { break } x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMOVSDBMasked128Merging) + v.reset(OpAMD64VPMOVSDBMasked128_512Merging) v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMDMasked512 dst (VSUBPS512 x y) mask) - // result: (VSUBPSMasked512Merging dst x y mask) + // match: (VPBLENDMDMasked512 dst (VPMOVSDW256 x) mask) + // result: (VPMOVSDWMasked256Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VSUBPS512 { + if v_1.Op != OpAMD64VPMOVSDW256 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VSUBPSMasked512Merging) - v.AddArg4(dst, x, y, mask) + v.reset(OpAMD64VPMOVSDWMasked256Merging) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPMAXUD512 x y) mask) - // result: (VPMAXUDMasked512Merging dst x y mask) + // match: (VPBLENDMDMasked512 dst (VPMOVUSDB128_512 x) mask) + // result: (VPMOVUSDBMasked128_512Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMAXUD512 { + if v_1.Op != OpAMD64VPMOVUSDB128_512 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMAXUDMasked512Merging) - v.AddArg4(dst, x, y, mask) + v.reset(OpAMD64VPMOVUSDBMasked128_512Merging) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPRORD512 [a] x) mask) - // result: (VPRORDMasked512Merging dst [a] x mask) + // match: (VPBLENDMDMasked512 dst (VPMOVUSDW256 x) mask) + // result: (VPMOVUSDWMasked256Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPRORD512 { + if v_1.Op != OpAMD64VPMOVUSDW256 { break } - a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPRORDMasked512Merging) - v.AuxInt = uint8ToAuxInt(a) + v.reset(OpAMD64VPMOVUSDWMasked256Merging) v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPROLVD512 x y) mask) - // result: (VPROLVDMasked512Merging dst x y mask) + // match: (VPBLENDMDMasked512 dst (VPMULLD512 x y) mask) + // result: (VPMULLDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPROLVD512 { + if v_1.Op != OpAMD64VPMULLD512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPROLVDMasked512Merging) + v.reset(OpAMD64VPMULLDMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VCVTTPS2DQ512 x) mask) - // result: (VCVTTPS2DQMasked512Merging dst x mask) + // match: (VPBLENDMDMasked512 dst (VPOPCNTD512 x) mask) + // result: (VPOPCNTDMasked512Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VCVTTPS2DQ512 { + if v_1.Op != OpAMD64VPOPCNTD512 { break } x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VCVTTPS2DQMasked512Merging) + v.reset(OpAMD64VPOPCNTDMasked512Merging) v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPACKSSDW512 x y) mask) - // result: (VPACKSSDWMasked512Merging dst x y mask) + // match: (VPBLENDMDMasked512 dst (VPORD512 x y) mask) + // result: (VPORDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPACKSSDW512 { + if v_1.Op != OpAMD64VPORD512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPACKSSDWMasked512Merging) + v.reset(OpAMD64VPORDMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPRORVD512 x y) mask) - // result: (VPRORVDMasked512Merging dst x y mask) + // match: (VPBLENDMDMasked512 dst (VPROLD512 [a] x) mask) + // result: (VPROLDMasked512Merging dst [a] x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPRORVD512 { + if v_1.Op != OpAMD64VPROLD512 { break } - y := v_1.Args[1] + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPRORVDMasked512Merging) - v.AddArg4(dst, x, y, mask) + v.reset(OpAMD64VPROLDMasked512Merging) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPADDD512 x y) mask) - // result: (VPADDDMasked512Merging dst x y mask) + // match: (VPBLENDMDMasked512 dst (VPROLVD512 x y) mask) + // result: (VPROLVDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPADDD512 { + if v_1.Op != OpAMD64VPROLVD512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPADDDMasked512Merging) + v.reset(OpAMD64VPROLVDMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VRNDSCALEPS512 [a] x) mask) - // result: (VRNDSCALEPSMasked512Merging dst [a] x mask) + // match: (VPBLENDMDMasked512 dst (VPRORD512 [a] x) mask) + // result: (VPRORDMasked512Merging dst [a] x mask) for { dst := v_0 - if v_1.Op != OpAMD64VRNDSCALEPS512 { + if v_1.Op != OpAMD64VPRORD512 { break } a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VRNDSCALEPSMasked512Merging) + v.reset(OpAMD64VPRORDMasked512Merging) v.AuxInt = uint8ToAuxInt(a) v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMDMasked512 dst (VCVTPS2UDQ512 x) mask) - // result: (VCVTPS2UDQMasked512Merging dst x mask) + // match: (VPBLENDMDMasked512 dst (VPRORVD512 x y) mask) + // result: (VPRORVDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VCVTPS2UDQ512 { + if v_1.Op != OpAMD64VPRORVD512 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VCVTPS2UDQMasked512Merging) - v.AddArg3(dst, x, mask) + v.reset(OpAMD64VPRORVDMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPSHRDD512 [a] x y) mask) - // result: (VPSHRDDMasked512Merging dst [a] x y mask) + // match: (VPBLENDMDMasked512 dst (VPSHLDD512 [a] x y) mask) + // result: (VPSHLDDMasked512Merging dst [a] x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSHRDD512 { + if v_1.Op != OpAMD64VPSHLDD512 { break } a := auxIntToUint8(v_1.AuxInt) y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSHRDDMasked512Merging) + v.reset(OpAMD64VPSHLDDMasked512Merging) v.AuxInt = uint8ToAuxInt(a) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPOPCNTD512 x) mask) - // result: (VPOPCNTDMasked512Merging dst x mask) + // match: (VPBLENDMDMasked512 dst (VPSHRDD512 [a] x y) mask) + // result: (VPSHRDDMasked512Merging dst [a] x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPOPCNTD512 { + if v_1.Op != OpAMD64VPSHRDD512 { break } + a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPOPCNTDMasked512Merging) - v.AddArg3(dst, x, mask) + v.reset(OpAMD64VPSHRDDMasked512Merging) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPMOVDB128 x) mask) - // result: (VPMOVDBMasked128Merging dst x mask) + // match: (VPBLENDMDMasked512 dst (VPSHUFD512 [a] x) mask) + // result: (VPSHUFDMasked512Merging dst [a] x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVDB128 { + if v_1.Op != OpAMD64VPSHUFD512 { break } + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMOVDBMasked128Merging) + v.reset(OpAMD64VPSHUFDMasked512Merging) + v.AuxInt = uint8ToAuxInt(a) v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPSRAD512const [a] x) mask) - // result: (VPSRADMasked512constMerging dst [a] x mask) + // match: (VPBLENDMDMasked512 dst (VPSLLD512const [a] x) mask) + // result: (VPSLLDMasked512constMerging dst [a] x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSRAD512const { + if v_1.Op != OpAMD64VPSLLD512const { break } a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSRADMasked512constMerging) + v.reset(OpAMD64VPSLLDMasked512constMerging) v.AuxInt = uint8ToAuxInt(a) v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMDMasked512 dst (VMINPS512 x y) mask) - // result: (VMINPSMasked512Merging dst x y mask) + // match: (VPBLENDMDMasked512 dst (VPSLLVD512 x y) mask) + // result: (VPSLLVDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VMINPS512 { + if v_1.Op != OpAMD64VPSLLVD512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VMINPSMasked512Merging) + v.reset(OpAMD64VPSLLVDMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPANDD512 x y) mask) - // result: (VPANDDMasked512Merging dst x y mask) + // match: (VPBLENDMDMasked512 dst (VPSRAD512const [a] x) mask) + // result: (VPSRADMasked512constMerging dst [a] x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPANDD512 { + if v_1.Op != OpAMD64VPSRAD512const { break } - y := v_1.Args[1] + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPANDDMasked512Merging) - v.AddArg4(dst, x, y, mask) + v.reset(OpAMD64VPSRADMasked512constMerging) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPSHUFD512 [a] x) mask) - // result: (VPSHUFDMasked512Merging dst [a] x mask) + // match: (VPBLENDMDMasked512 dst (VPSRAVD512 x y) mask) + // result: (VPSRAVDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSHUFD512 { + if v_1.Op != OpAMD64VPSRAVD512 { break } - a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSHUFDMasked512Merging) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(dst, x, mask) + v.reset(OpAMD64VPSRAVDMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPMINSD512 x y) mask) - // result: (VPMINSDMasked512Merging dst x y mask) + // match: (VPBLENDMDMasked512 dst (VPSRLVD512 x y) mask) + // result: (VPSRLVDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMINSD512 { + if v_1.Op != OpAMD64VPSRLVD512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMINSDMasked512Merging) + v.reset(OpAMD64VPSRLVDMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPSRAVD512 x y) mask) - // result: (VPSRAVDMasked512Merging dst x y mask) + // match: (VPBLENDMDMasked512 dst (VPSUBD512 x y) mask) + // result: (VPSUBDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSRAVD512 { + if v_1.Op != OpAMD64VPSUBD512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSRAVDMasked512Merging) + v.reset(OpAMD64VPSUBDMasked512Merging) v.AddArg4(dst, x, y, mask) return true } @@ -41037,47 +41361,60 @@ func rewriteValueAMD64_OpAMD64VPBLENDMDMasked512(v *Value) bool { v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPSLLVD512 x y) mask) - // result: (VPSLLVDMasked512Merging dst x y mask) + // match: (VPBLENDMDMasked512 dst (VRCP14PS512 x) mask) + // result: (VRCP14PSMasked512Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSLLVD512 { + if v_1.Op != OpAMD64VRCP14PS512 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSLLVDMasked512Merging) - v.AddArg4(dst, x, y, mask) + v.reset(OpAMD64VRCP14PSMasked512Merging) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPSLLD512const [a] x) mask) - // result: (VPSLLDMasked512constMerging dst [a] x mask) + // match: (VPBLENDMDMasked512 dst (VREDUCEPS512 [a] x) mask) + // result: (VREDUCEPSMasked512Merging dst [a] x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSLLD512const { + if v_1.Op != OpAMD64VREDUCEPS512 { break } a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSLLDMasked512constMerging) + v.reset(OpAMD64VREDUCEPSMasked512Merging) v.AuxInt = uint8ToAuxInt(a) v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPMINUD512 x y) mask) - // result: (VPMINUDMasked512Merging dst x y mask) + // match: (VPBLENDMDMasked512 dst (VRNDSCALEPS512 [a] x) mask) + // result: (VRNDSCALEPSMasked512Merging dst [a] x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMINUD512 { + if v_1.Op != OpAMD64VRNDSCALEPS512 { break } - y := v_1.Args[1] + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMINUDMasked512Merging) - v.AddArg4(dst, x, y, mask) + v.reset(OpAMD64VRNDSCALEPSMasked512Merging) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(dst, x, mask) + return true + } + // match: (VPBLENDMDMasked512 dst (VRSQRT14PS512 x) mask) + // result: (VRSQRT14PSMasked512Merging dst x mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VRSQRT14PS512 { + break + } + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VRSQRT14PSMasked512Merging) + v.AddArg3(dst, x, mask) return true } // match: (VPBLENDMDMasked512 dst (VSCALEFPS512 x y) mask) @@ -41107,30 +41444,17 @@ func rewriteValueAMD64_OpAMD64VPBLENDMDMasked512(v *Value) bool { v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMDMasked512 dst (VPABSD512 x) mask) - // result: (VPABSDMasked512Merging dst x mask) - for { - dst := v_0 - if v_1.Op != OpAMD64VPABSD512 { - break - } - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPABSDMasked512Merging) - v.AddArg3(dst, x, mask) - return true - } - // match: (VPBLENDMDMasked512 dst (VMULPS512 x y) mask) - // result: (VMULPSMasked512Merging dst x y mask) + // match: (VPBLENDMDMasked512 dst (VSUBPS512 x y) mask) + // result: (VSUBPSMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VMULPS512 { + if v_1.Op != OpAMD64VSUBPS512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VMULPSMasked512Merging) + v.reset(OpAMD64VSUBPSMasked512Merging) v.AddArg4(dst, x, y, mask) return true } @@ -41163,104 +41487,74 @@ func rewriteValueAMD64_OpAMD64VPBLENDMQMasked512(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPBLENDMQMasked512 dst (VPSLLQ512const [a] x) mask) - // result: (VPSLLQMasked512constMerging dst [a] x mask) + // match: (VPBLENDMQMasked512 dst (VADDPD512 x y) mask) + // result: (VADDPDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSLLQ512const { + if v_1.Op != OpAMD64VADDPD512 { break } - a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSLLQMasked512constMerging) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(dst, x, mask) + v.reset(OpAMD64VADDPDMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPSUBQ512 x y) mask) - // result: (VPSUBQMasked512Merging dst x y mask) + // match: (VPBLENDMQMasked512 dst (VDIVPD512 x y) mask) + // result: (VDIVPDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSUBQ512 { + if v_1.Op != OpAMD64VDIVPD512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSUBQMasked512Merging) + v.reset(OpAMD64VDIVPDMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPROLQ512 [a] x) mask) - // result: (VPROLQMasked512Merging dst [a] x mask) - for { - dst := v_0 - if v_1.Op != OpAMD64VPROLQ512 { - break - } - a := auxIntToUint8(v_1.AuxInt) - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPROLQMasked512Merging) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(dst, x, mask) - return true - } - // match: (VPBLENDMQMasked512 dst (VPSLLVQ512 x y) mask) - // result: (VPSLLVQMasked512Merging dst x y mask) + // match: (VPBLENDMQMasked512 dst (VMAXPD512 x y) mask) + // result: (VMAXPDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSLLVQ512 { + if v_1.Op != OpAMD64VMAXPD512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSLLVQMasked512Merging) + v.reset(OpAMD64VMAXPDMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPMOVUSQB128 x) mask) - // result: (VPMOVUSQBMasked128Merging dst x mask) - for { - dst := v_0 - if v_1.Op != OpAMD64VPMOVUSQB128 { - break - } - x := v_1.Args[0] - mask := v_2 - v.reset(OpAMD64VPMOVUSQBMasked128Merging) - v.AddArg3(dst, x, mask) - return true - } - // match: (VPBLENDMQMasked512 dst (VPADDQ512 x y) mask) - // result: (VPADDQMasked512Merging dst x y mask) + // match: (VPBLENDMQMasked512 dst (VMINPD512 x y) mask) + // result: (VMINPDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPADDQ512 { + if v_1.Op != OpAMD64VMINPD512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPADDQMasked512Merging) + v.reset(OpAMD64VMINPDMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VRNDSCALEPD512 [a] x) mask) - // result: (VRNDSCALEPDMasked512Merging dst [a] x mask) + // match: (VPBLENDMQMasked512 dst (VMULPD512 x y) mask) + // result: (VMULPDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VRNDSCALEPD512 { + if v_1.Op != OpAMD64VMULPD512 { break } - a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VRNDSCALEPDMasked512Merging) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(dst, x, mask) + v.reset(OpAMD64VMULPDMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } // match: (VPBLENDMQMasked512 dst (VPABSQ512 x) mask) @@ -41276,226 +41570,218 @@ func rewriteValueAMD64_OpAMD64VPBLENDMQMasked512(v *Value) bool { v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPMOVUSQD256 x) mask) - // result: (VPMOVUSQDMasked256Merging dst x mask) + // match: (VPBLENDMQMasked512 dst (VPADDQ512 x y) mask) + // result: (VPADDQMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVUSQD256 { + if v_1.Op != OpAMD64VPADDQ512 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMOVUSQDMasked256Merging) - v.AddArg3(dst, x, mask) + v.reset(OpAMD64VPADDQMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VADDPD512 x y) mask) - // result: (VADDPDMasked512Merging dst x y mask) + // match: (VPBLENDMQMasked512 dst (VPANDQ512 x y) mask) + // result: (VPANDQMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VADDPD512 { + if v_1.Op != OpAMD64VPANDQ512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VADDPDMasked512Merging) + v.reset(OpAMD64VPANDQMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VRCP14PD512 x) mask) - // result: (VRCP14PDMasked512Merging dst x mask) + // match: (VPBLENDMQMasked512 dst (VPLZCNTQ512 x) mask) + // result: (VPLZCNTQMasked512Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VRCP14PD512 { + if v_1.Op != OpAMD64VPLZCNTQ512 { break } x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VRCP14PDMasked512Merging) + v.reset(OpAMD64VPLZCNTQMasked512Merging) v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPSRLVQ512 x y) mask) - // result: (VPSRLVQMasked512Merging dst x y mask) + // match: (VPBLENDMQMasked512 dst (VPMAXSQ512 x y) mask) + // result: (VPMAXSQMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSRLVQ512 { + if v_1.Op != OpAMD64VPMAXSQ512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSRLVQMasked512Merging) + v.reset(OpAMD64VPMAXSQMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPRORVQ512 x y) mask) - // result: (VPRORVQMasked512Merging dst x y mask) + // match: (VPBLENDMQMasked512 dst (VPMAXUQ512 x y) mask) + // result: (VPMAXUQMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPRORVQ512 { + if v_1.Op != OpAMD64VPMAXUQ512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPRORVQMasked512Merging) + v.reset(OpAMD64VPMAXUQMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPSRAVQ512 x y) mask) - // result: (VPSRAVQMasked512Merging dst x y mask) + // match: (VPBLENDMQMasked512 dst (VPMINSQ512 x y) mask) + // result: (VPMINSQMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSRAVQ512 { + if v_1.Op != OpAMD64VPMINSQ512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSRAVQMasked512Merging) + v.reset(OpAMD64VPMINSQMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPANDQ512 x y) mask) - // result: (VPANDQMasked512Merging dst x y mask) + // match: (VPBLENDMQMasked512 dst (VPMINUQ512 x y) mask) + // result: (VPMINUQMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPANDQ512 { + if v_1.Op != OpAMD64VPMINUQ512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPANDQMasked512Merging) + v.reset(OpAMD64VPMINUQMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPMOVQB128 x) mask) - // result: (VPMOVQBMasked128Merging dst x mask) + // match: (VPBLENDMQMasked512 dst (VPMOVQB128_512 x) mask) + // result: (VPMOVQBMasked128_512Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVQB128 { + if v_1.Op != OpAMD64VPMOVQB128_512 { break } x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMOVQBMasked128Merging) + v.reset(OpAMD64VPMOVQBMasked128_512Merging) v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPSHLDQ512 [a] x y) mask) - // result: (VPSHLDQMasked512Merging dst [a] x y mask) + // match: (VPBLENDMQMasked512 dst (VPMOVQD256 x) mask) + // result: (VPMOVQDMasked256Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSHLDQ512 { + if v_1.Op != OpAMD64VPMOVQD256 { break } - a := auxIntToUint8(v_1.AuxInt) - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSHLDQMasked512Merging) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg4(dst, x, y, mask) + v.reset(OpAMD64VPMOVQDMasked256Merging) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VDIVPD512 x y) mask) - // result: (VDIVPDMasked512Merging dst x y mask) + // match: (VPBLENDMQMasked512 dst (VPMOVQW128_512 x) mask) + // result: (VPMOVQWMasked128_512Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VDIVPD512 { + if v_1.Op != OpAMD64VPMOVQW128_512 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VDIVPDMasked512Merging) - v.AddArg4(dst, x, y, mask) + v.reset(OpAMD64VPMOVQWMasked128_512Merging) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPROLVQ512 x y) mask) - // result: (VPROLVQMasked512Merging dst x y mask) + // match: (VPBLENDMQMasked512 dst (VPMOVSQB128_512 x) mask) + // result: (VPMOVSQBMasked128_512Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPROLVQ512 { + if v_1.Op != OpAMD64VPMOVSQB128_512 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPROLVQMasked512Merging) - v.AddArg4(dst, x, y, mask) + v.reset(OpAMD64VPMOVSQBMasked128_512Merging) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPRORQ512 [a] x) mask) - // result: (VPRORQMasked512Merging dst [a] x mask) + // match: (VPBLENDMQMasked512 dst (VPMOVSQD256 x) mask) + // result: (VPMOVSQDMasked256Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPRORQ512 { + if v_1.Op != OpAMD64VPMOVSQD256 { break } - a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPRORQMasked512Merging) - v.AuxInt = uint8ToAuxInt(a) + v.reset(OpAMD64VPMOVSQDMasked256Merging) v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPMINSQ512 x y) mask) - // result: (VPMINSQMasked512Merging dst x y mask) + // match: (VPBLENDMQMasked512 dst (VPMOVSQW128_512 x) mask) + // result: (VPMOVSQWMasked128_512Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMINSQ512 { + if v_1.Op != OpAMD64VPMOVSQW128_512 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMINSQMasked512Merging) - v.AddArg4(dst, x, y, mask) + v.reset(OpAMD64VPMOVSQWMasked128_512Merging) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VSQRTPD512 x) mask) - // result: (VSQRTPDMasked512Merging dst x mask) + // match: (VPBLENDMQMasked512 dst (VPMOVUSQB128_512 x) mask) + // result: (VPMOVUSQBMasked128_512Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VSQRTPD512 { + if v_1.Op != OpAMD64VPMOVUSQB128_512 { break } x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VSQRTPDMasked512Merging) + v.reset(OpAMD64VPMOVUSQBMasked128_512Merging) v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPMOVSQD256 x) mask) - // result: (VPMOVSQDMasked256Merging dst x mask) + // match: (VPBLENDMQMasked512 dst (VPMOVUSQD256 x) mask) + // result: (VPMOVUSQDMasked256Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVSQD256 { + if v_1.Op != OpAMD64VPMOVUSQD256 { break } x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMOVSQDMasked256Merging) + v.reset(OpAMD64VPMOVUSQDMasked256Merging) v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VMINPD512 x y) mask) - // result: (VMINPDMasked512Merging dst x y mask) + // match: (VPBLENDMQMasked512 dst (VPMOVUSQW128_512 x) mask) + // result: (VPMOVUSQWMasked128_512Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VMINPD512 { + if v_1.Op != OpAMD64VPMOVUSQW128_512 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VMINPDMasked512Merging) - v.AddArg4(dst, x, y, mask) + v.reset(OpAMD64VPMOVUSQWMasked128_512Merging) + v.AddArg3(dst, x, mask) return true } // match: (VPBLENDMQMasked512 dst (VPMULLQ512 x y) mask) @@ -41512,237 +41798,263 @@ func rewriteValueAMD64_OpAMD64VPBLENDMQMasked512(v *Value) bool { v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VMAXPD512 x y) mask) - // result: (VMAXPDMasked512Merging dst x y mask) + // match: (VPBLENDMQMasked512 dst (VPOPCNTQ512 x) mask) + // result: (VPOPCNTQMasked512Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VMAXPD512 { + if v_1.Op != OpAMD64VPOPCNTQ512 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VMAXPDMasked512Merging) - v.AddArg4(dst, x, y, mask) + v.reset(OpAMD64VPOPCNTQMasked512Merging) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VMULPD512 x y) mask) - // result: (VMULPDMasked512Merging dst x y mask) + // match: (VPBLENDMQMasked512 dst (VPORQ512 x y) mask) + // result: (VPORQMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VMULPD512 { + if v_1.Op != OpAMD64VPORQ512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VMULPDMasked512Merging) + v.reset(OpAMD64VPORQMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPORQ512 x y) mask) - // result: (VPORQMasked512Merging dst x y mask) + // match: (VPBLENDMQMasked512 dst (VPROLQ512 [a] x) mask) + // result: (VPROLQMasked512Merging dst [a] x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPORQ512 { + if v_1.Op != OpAMD64VPROLQ512 { break } - y := v_1.Args[1] + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPORQMasked512Merging) - v.AddArg4(dst, x, y, mask) + v.reset(OpAMD64VPROLQMasked512Merging) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPMOVUSQW128 x) mask) - // result: (VPMOVUSQWMasked128Merging dst x mask) + // match: (VPBLENDMQMasked512 dst (VPROLVQ512 x y) mask) + // result: (VPROLVQMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVUSQW128 { + if v_1.Op != OpAMD64VPROLVQ512 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMOVUSQWMasked128Merging) - v.AddArg3(dst, x, mask) + v.reset(OpAMD64VPROLVQMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VREDUCEPD512 [a] x) mask) - // result: (VREDUCEPDMasked512Merging dst [a] x mask) + // match: (VPBLENDMQMasked512 dst (VPRORQ512 [a] x) mask) + // result: (VPRORQMasked512Merging dst [a] x mask) for { dst := v_0 - if v_1.Op != OpAMD64VREDUCEPD512 { + if v_1.Op != OpAMD64VPRORQ512 { break } a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VREDUCEPDMasked512Merging) + v.reset(OpAMD64VPRORQMasked512Merging) v.AuxInt = uint8ToAuxInt(a) v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPOPCNTQ512 x) mask) - // result: (VPOPCNTQMasked512Merging dst x mask) + // match: (VPBLENDMQMasked512 dst (VPRORVQ512 x y) mask) + // result: (VPRORVQMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPOPCNTQ512 { + if v_1.Op != OpAMD64VPRORVQ512 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPOPCNTQMasked512Merging) - v.AddArg3(dst, x, mask) + v.reset(OpAMD64VPRORVQMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPXORQ512 x y) mask) - // result: (VPXORQMasked512Merging dst x y mask) + // match: (VPBLENDMQMasked512 dst (VPSHLDQ512 [a] x y) mask) + // result: (VPSHLDQMasked512Merging dst [a] x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPXORQ512 { + if v_1.Op != OpAMD64VPSHLDQ512 { break } + a := auxIntToUint8(v_1.AuxInt) y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPXORQMasked512Merging) + v.reset(OpAMD64VPSHLDQMasked512Merging) + v.AuxInt = uint8ToAuxInt(a) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPMOVQD256 x) mask) - // result: (VPMOVQDMasked256Merging dst x mask) + // match: (VPBLENDMQMasked512 dst (VPSHRDQ512 [a] x y) mask) + // result: (VPSHRDQMasked512Merging dst [a] x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVQD256 { + if v_1.Op != OpAMD64VPSHRDQ512 { break } + a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMOVQDMasked256Merging) - v.AddArg3(dst, x, mask) + v.reset(OpAMD64VPSHRDQMasked512Merging) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPMAXUQ512 x y) mask) - // result: (VPMAXUQMasked512Merging dst x y mask) + // match: (VPBLENDMQMasked512 dst (VPSLLQ512const [a] x) mask) + // result: (VPSLLQMasked512constMerging dst [a] x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMAXUQ512 { + if v_1.Op != OpAMD64VPSLLQ512const { break } - y := v_1.Args[1] + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMAXUQMasked512Merging) - v.AddArg4(dst, x, y, mask) + v.reset(OpAMD64VPSLLQMasked512constMerging) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VSUBPD512 x y) mask) - // result: (VSUBPDMasked512Merging dst x y mask) + // match: (VPBLENDMQMasked512 dst (VPSLLVQ512 x y) mask) + // result: (VPSLLVQMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VSUBPD512 { + if v_1.Op != OpAMD64VPSLLVQ512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VSUBPDMasked512Merging) + v.reset(OpAMD64VPSLLVQMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPMOVQW128 x) mask) - // result: (VPMOVQWMasked128Merging dst x mask) + // match: (VPBLENDMQMasked512 dst (VPSRAQ512const [a] x) mask) + // result: (VPSRAQMasked512constMerging dst [a] x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVQW128 { + if v_1.Op != OpAMD64VPSRAQ512const { break } + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMOVQWMasked128Merging) + v.reset(OpAMD64VPSRAQMasked512constMerging) + v.AuxInt = uint8ToAuxInt(a) v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPSHRDQ512 [a] x y) mask) - // result: (VPSHRDQMasked512Merging dst [a] x y mask) + // match: (VPBLENDMQMasked512 dst (VPSRAVQ512 x y) mask) + // result: (VPSRAVQMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSHRDQ512 { + if v_1.Op != OpAMD64VPSRAVQ512 { break } - a := auxIntToUint8(v_1.AuxInt) y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSHRDQMasked512Merging) - v.AuxInt = uint8ToAuxInt(a) + v.reset(OpAMD64VPSRAVQMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPLZCNTQ512 x) mask) - // result: (VPLZCNTQMasked512Merging dst x mask) + // match: (VPBLENDMQMasked512 dst (VPSRLVQ512 x y) mask) + // result: (VPSRLVQMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPLZCNTQ512 { + if v_1.Op != OpAMD64VPSRLVQ512 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPLZCNTQMasked512Merging) - v.AddArg3(dst, x, mask) + v.reset(OpAMD64VPSRLVQMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VSCALEFPD512 x y) mask) - // result: (VSCALEFPDMasked512Merging dst x y mask) + // match: (VPBLENDMQMasked512 dst (VPSUBQ512 x y) mask) + // result: (VPSUBQMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VSCALEFPD512 { + if v_1.Op != OpAMD64VPSUBQ512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VSCALEFPDMasked512Merging) + v.reset(OpAMD64VPSUBQMasked512Merging) + v.AddArg4(dst, x, y, mask) + return true + } + // match: (VPBLENDMQMasked512 dst (VPXORQ512 x y) mask) + // result: (VPXORQMasked512Merging dst x y mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VPXORQ512 { + break + } + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPXORQMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPMOVSQW128 x) mask) - // result: (VPMOVSQWMasked128Merging dst x mask) + // match: (VPBLENDMQMasked512 dst (VRCP14PD512 x) mask) + // result: (VRCP14PDMasked512Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVSQW128 { + if v_1.Op != OpAMD64VRCP14PD512 { break } x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMOVSQWMasked128Merging) + v.reset(OpAMD64VRCP14PDMasked512Merging) v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPMINUQ512 x y) mask) - // result: (VPMINUQMasked512Merging dst x y mask) + // match: (VPBLENDMQMasked512 dst (VREDUCEPD512 [a] x) mask) + // result: (VREDUCEPDMasked512Merging dst [a] x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMINUQ512 { + if v_1.Op != OpAMD64VREDUCEPD512 { break } - y := v_1.Args[1] + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMINUQMasked512Merging) - v.AddArg4(dst, x, y, mask) + v.reset(OpAMD64VREDUCEPDMasked512Merging) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPMOVSQB128 x) mask) - // result: (VPMOVSQBMasked128Merging dst x mask) + // match: (VPBLENDMQMasked512 dst (VRNDSCALEPD512 [a] x) mask) + // result: (VRNDSCALEPDMasked512Merging dst [a] x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVSQB128 { + if v_1.Op != OpAMD64VRNDSCALEPD512 { break } + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMOVSQBMasked128Merging) + v.reset(OpAMD64VRNDSCALEPDMasked512Merging) + v.AuxInt = uint8ToAuxInt(a) v.AddArg3(dst, x, mask) return true } @@ -41759,32 +42071,44 @@ func rewriteValueAMD64_OpAMD64VPBLENDMQMasked512(v *Value) bool { v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPSRAQ512const [a] x) mask) - // result: (VPSRAQMasked512constMerging dst [a] x mask) + // match: (VPBLENDMQMasked512 dst (VSCALEFPD512 x y) mask) + // result: (VSCALEFPDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSRAQ512const { + if v_1.Op != OpAMD64VSCALEFPD512 { break } - a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSRAQMasked512constMerging) - v.AuxInt = uint8ToAuxInt(a) + v.reset(OpAMD64VSCALEFPDMasked512Merging) + v.AddArg4(dst, x, y, mask) + return true + } + // match: (VPBLENDMQMasked512 dst (VSQRTPD512 x) mask) + // result: (VSQRTPDMasked512Merging dst x mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VSQRTPD512 { + break + } + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VSQRTPDMasked512Merging) v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMQMasked512 dst (VPMAXSQ512 x y) mask) - // result: (VPMAXSQMasked512Merging dst x y mask) + // match: (VPBLENDMQMasked512 dst (VSUBPD512 x y) mask) + // result: (VSUBPDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMAXSQ512 { + if v_1.Op != OpAMD64VSUBPD512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMAXSQMasked512Merging) + v.reset(OpAMD64VSUBPDMasked512Merging) v.AddArg4(dst, x, y, mask) return true } @@ -41817,45 +42141,73 @@ func rewriteValueAMD64_OpAMD64VPBLENDMWMasked512(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPBLENDMWMasked512 dst (VPMAXSW512 x y) mask) - // result: (VPMAXSWMasked512Merging dst x y mask) + // match: (VPBLENDMWMasked512 dst (VPABSW512 x) mask) + // result: (VPABSWMasked512Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMAXSW512 { + if v_1.Op != OpAMD64VPABSW512 { + break + } + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPABSWMasked512Merging) + v.AddArg3(dst, x, mask) + return true + } + // match: (VPBLENDMWMasked512 dst (VPADDSW512 x y) mask) + // result: (VPADDSWMasked512Merging dst x y mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VPADDSW512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMAXSWMasked512Merging) + v.reset(OpAMD64VPADDSWMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPMULHW512 x y) mask) - // result: (VPMULHWMasked512Merging dst x y mask) + // match: (VPBLENDMWMasked512 dst (VPADDUSW512 x y) mask) + // result: (VPADDUSWMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMULHW512 { + if v_1.Op != OpAMD64VPADDUSW512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMULHWMasked512Merging) + v.reset(OpAMD64VPADDUSWMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPMOVWB256 x) mask) - // result: (VPMOVWBMasked256Merging dst x mask) + // match: (VPBLENDMWMasked512 dst (VPADDW512 x y) mask) + // result: (VPADDWMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVWB256 { + if v_1.Op != OpAMD64VPADDW512 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMOVWBMasked256Merging) - v.AddArg3(dst, x, mask) + v.reset(OpAMD64VPADDWMasked512Merging) + v.AddArg4(dst, x, y, mask) + return true + } + // match: (VPBLENDMWMasked512 dst (VPAVGW512 x y) mask) + // result: (VPAVGWMasked512Merging dst x y mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VPAVGW512 { + break + } + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPAVGWMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } // match: (VPBLENDMWMasked512 dst (VPMADDUBSW512 x y) mask) @@ -41872,47 +42224,46 @@ func rewriteValueAMD64_OpAMD64VPBLENDMWMasked512(v *Value) bool { v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPSHLDW512 [a] x y) mask) - // result: (VPSHLDWMasked512Merging dst [a] x y mask) + // match: (VPBLENDMWMasked512 dst (VPMADDWD512 x y) mask) + // result: (VPMADDWDMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSHLDW512 { + if v_1.Op != OpAMD64VPMADDWD512 { break } - a := auxIntToUint8(v_1.AuxInt) y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSHLDWMasked512Merging) - v.AuxInt = uint8ToAuxInt(a) + v.reset(OpAMD64VPMADDWDMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPMULHUW512 x y) mask) - // result: (VPMULHUWMasked512Merging dst x y mask) + // match: (VPBLENDMWMasked512 dst (VPMAXSW512 x y) mask) + // result: (VPMAXSWMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMULHUW512 { + if v_1.Op != OpAMD64VPMAXSW512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMULHUWMasked512Merging) + v.reset(OpAMD64VPMAXSWMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPMOVUSWB256 x) mask) - // result: (VPMOVUSWBMasked256Merging dst x mask) + // match: (VPBLENDMWMasked512 dst (VPMAXUW512 x y) mask) + // result: (VPMAXUWMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVUSWB256 { + if v_1.Op != OpAMD64VPMAXUW512 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMOVUSWBMasked256Merging) - v.AddArg3(dst, x, mask) + v.reset(OpAMD64VPMAXUWMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } // match: (VPBLENDMWMasked512 dst (VPMINSW512 x y) mask) @@ -41929,121 +42280,161 @@ func rewriteValueAMD64_OpAMD64VPBLENDMWMasked512(v *Value) bool { v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPSRAVW512 x y) mask) - // result: (VPSRAVWMasked512Merging dst x y mask) + // match: (VPBLENDMWMasked512 dst (VPMINUW512 x y) mask) + // result: (VPMINUWMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSRAVW512 { + if v_1.Op != OpAMD64VPMINUW512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSRAVWMasked512Merging) + v.reset(OpAMD64VPMINUWMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPADDW512 x y) mask) - // result: (VPADDWMasked512Merging dst x y mask) + // match: (VPBLENDMWMasked512 dst (VPMOVSWB256 x) mask) + // result: (VPMOVSWBMasked256Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPADDW512 { + if v_1.Op != OpAMD64VPMOVSWB256 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPADDWMasked512Merging) - v.AddArg4(dst, x, y, mask) + v.reset(OpAMD64VPMOVSWBMasked256Merging) + v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPSHUFHW512 [a] x) mask) - // result: (VPSHUFHWMasked512Merging dst [a] x mask) + // match: (VPBLENDMWMasked512 dst (VPMOVUSWB256 x) mask) + // result: (VPMOVUSWBMasked256Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSHUFHW512 { + if v_1.Op != OpAMD64VPMOVUSWB256 { break } - a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSHUFHWMasked512Merging) - v.AuxInt = uint8ToAuxInt(a) + v.reset(OpAMD64VPMOVUSWBMasked256Merging) v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPSHRDW512 [a] x y) mask) - // result: (VPSHRDWMasked512Merging dst [a] x y mask) + // match: (VPBLENDMWMasked512 dst (VPMOVWB256 x) mask) + // result: (VPMOVWBMasked256Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSHRDW512 { + if v_1.Op != OpAMD64VPMOVWB256 { break } - a := auxIntToUint8(v_1.AuxInt) - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSHRDWMasked512Merging) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg4(dst, x, y, mask) - return true + v.reset(OpAMD64VPMOVWBMasked256Merging) + v.AddArg3(dst, x, mask) + return true } - // match: (VPBLENDMWMasked512 dst (VPSUBSW512 x y) mask) - // result: (VPSUBSWMasked512Merging dst x y mask) + // match: (VPBLENDMWMasked512 dst (VPMULHUW512 x y) mask) + // result: (VPMULHUWMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSUBSW512 { + if v_1.Op != OpAMD64VPMULHUW512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSUBSWMasked512Merging) + v.reset(OpAMD64VPMULHUWMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPSUBUSW512 x y) mask) - // result: (VPSUBUSWMasked512Merging dst x y mask) + // match: (VPBLENDMWMasked512 dst (VPMULHW512 x y) mask) + // result: (VPMULHWMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSUBUSW512 { + if v_1.Op != OpAMD64VPMULHW512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSUBUSWMasked512Merging) + v.reset(OpAMD64VPMULHWMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPSUBW512 x y) mask) - // result: (VPSUBWMasked512Merging dst x y mask) + // match: (VPBLENDMWMasked512 dst (VPMULLW512 x y) mask) + // result: (VPMULLWMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPSUBW512 { + if v_1.Op != OpAMD64VPMULLW512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSUBWMasked512Merging) + v.reset(OpAMD64VPMULLWMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPMADDWD512 x y) mask) - // result: (VPMADDWDMasked512Merging dst x y mask) + // match: (VPBLENDMWMasked512 dst (VPOPCNTW512 x) mask) + // result: (VPOPCNTWMasked512Merging dst x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMADDWD512 { + if v_1.Op != OpAMD64VPOPCNTW512 { break } + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPOPCNTWMasked512Merging) + v.AddArg3(dst, x, mask) + return true + } + // match: (VPBLENDMWMasked512 dst (VPSHLDW512 [a] x y) mask) + // result: (VPSHLDWMasked512Merging dst [a] x y mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VPSHLDW512 { + break + } + a := auxIntToUint8(v_1.AuxInt) y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMADDWDMasked512Merging) + v.reset(OpAMD64VPSHLDWMasked512Merging) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg4(dst, x, y, mask) + return true + } + // match: (VPBLENDMWMasked512 dst (VPSHRDW512 [a] x y) mask) + // result: (VPSHRDWMasked512Merging dst [a] x y mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VPSHRDW512 { + break + } + a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPSHRDWMasked512Merging) + v.AuxInt = uint8ToAuxInt(a) v.AddArg4(dst, x, y, mask) return true } + // match: (VPBLENDMWMasked512 dst (VPSHUFHW512 [a] x) mask) + // result: (VPSHUFHWMasked512Merging dst [a] x mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VPSHUFHW512 { + break + } + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPSHUFHWMasked512Merging) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(dst, x, mask) + return true + } // match: (VPBLENDMWMasked512 dst (VPSLLVW512 x y) mask) // result: (VPSLLVWMasked512Merging dst x y mask) for { @@ -42058,19 +42449,35 @@ func rewriteValueAMD64_OpAMD64VPBLENDMWMasked512(v *Value) bool { v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPABSW512 x) mask) - // result: (VPABSWMasked512Merging dst x mask) + // match: (VPBLENDMWMasked512 dst (VPSLLW512const [a] x) mask) + // result: (VPSLLWMasked512constMerging dst [a] x mask) for { dst := v_0 - if v_1.Op != OpAMD64VPABSW512 { + if v_1.Op != OpAMD64VPSLLW512const { break } + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPABSWMasked512Merging) + v.reset(OpAMD64VPSLLWMasked512constMerging) + v.AuxInt = uint8ToAuxInt(a) v.AddArg3(dst, x, mask) return true } + // match: (VPBLENDMWMasked512 dst (VPSRAVW512 x y) mask) + // result: (VPSRAVWMasked512Merging dst x y mask) + for { + dst := v_0 + if v_1.Op != OpAMD64VPSRAVW512 { + break + } + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + v.reset(OpAMD64VPSRAVWMasked512Merging) + v.AddArg4(dst, x, y, mask) + return true + } // match: (VPBLENDMWMasked512 dst (VPSRAW512const [a] x) mask) // result: (VPSRAWMasked512constMerging dst [a] x mask) for { @@ -42086,158 +42493,584 @@ func rewriteValueAMD64_OpAMD64VPBLENDMWMasked512(v *Value) bool { v.AddArg3(dst, x, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPADDUSW512 x y) mask) - // result: (VPADDUSWMasked512Merging dst x y mask) + // match: (VPBLENDMWMasked512 dst (VPSRLVW512 x y) mask) + // result: (VPSRLVWMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPADDUSW512 { + if v_1.Op != OpAMD64VPSRLVW512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPADDUSWMasked512Merging) + v.reset(OpAMD64VPSRLVWMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPOPCNTW512 x) mask) - // result: (VPOPCNTWMasked512Merging dst x mask) + // match: (VPBLENDMWMasked512 dst (VPSUBSW512 x y) mask) + // result: (VPSUBSWMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPOPCNTW512 { + if v_1.Op != OpAMD64VPSUBSW512 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPOPCNTWMasked512Merging) - v.AddArg3(dst, x, mask) + v.reset(OpAMD64VPSUBSWMasked512Merging) + v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPMINUW512 x y) mask) - // result: (VPMINUWMasked512Merging dst x y mask) + // match: (VPBLENDMWMasked512 dst (VPSUBUSW512 x y) mask) + // result: (VPSUBUSWMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPMINUW512 { + if v_1.Op != OpAMD64VPSUBUSW512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMINUWMasked512Merging) + v.reset(OpAMD64VPSUBUSWMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPAVGW512 x y) mask) - // result: (VPAVGWMasked512Merging dst x y mask) + // match: (VPBLENDMWMasked512 dst (VPSUBW512 x y) mask) + // result: (VPSUBWMasked512Merging dst x y mask) for { dst := v_0 - if v_1.Op != OpAMD64VPAVGW512 { + if v_1.Op != OpAMD64VPSUBW512 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPAVGWMasked512Merging) + v.reset(OpAMD64VPSUBWMasked512Merging) v.AddArg4(dst, x, y, mask) return true } - // match: (VPBLENDMWMasked512 dst (VPMOVSWB256 x) mask) - // result: (VPMOVSWBMasked256Merging dst x mask) + return false +} +func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (VPBLENDVB128 dst (VADDPD128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VADDPDMasked128Merging dst x y (VPMOVVec64x2ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVSWB256 { + if v_1.Op != OpAMD64VADDPD128 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMOVSWBMasked256Merging) - v.AddArg3(dst, x, mask) + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VADDPDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDMWMasked512 dst (VPMAXUW512 x y) mask) - // result: (VPMAXUWMasked512Merging dst x y mask) + // match: (VPBLENDVB128 dst (VADDPS128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VADDPSMasked128Merging dst x y (VPMOVVec32x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMAXUW512 { + if v_1.Op != OpAMD64VADDPS128 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMAXUWMasked512Merging) - v.AddArg4(dst, x, y, mask) + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VADDPSMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDMWMasked512 dst (VPSRLVW512 x y) mask) - // result: (VPSRLVWMasked512Merging dst x y mask) + // match: (VPBLENDVB128 dst (VBROADCASTSD256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VBROADCASTSDMasked256Merging dst x (VPMOVVec64x2ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSRLVW512 { + if v_1.Op != OpAMD64VBROADCASTSD256 { + break + } + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VBROADCASTSDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true + } + // match: (VPBLENDVB128 dst (VBROADCASTSD512 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VBROADCASTSDMasked512Merging dst x (VPMOVVec64x2ToM mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VBROADCASTSD512 { + break + } + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VBROADCASTSDMasked512Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true + } + // match: (VPBLENDVB128 dst (VBROADCASTSS128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VBROADCASTSSMasked128Merging dst x (VPMOVVec32x4ToM mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VBROADCASTSS128 { + break + } + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VBROADCASTSSMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true + } + // match: (VPBLENDVB128 dst (VBROADCASTSS256 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VBROADCASTSSMasked256Merging dst x (VPMOVVec32x4ToM mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VBROADCASTSS256 { + break + } + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VBROADCASTSSMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true + } + // match: (VPBLENDVB128 dst (VBROADCASTSS512 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VBROADCASTSSMasked512Merging dst x (VPMOVVec32x4ToM mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VBROADCASTSS512 { + break + } + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VBROADCASTSSMasked512Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true + } + // match: (VPBLENDVB128 dst (VCVTPS2UDQ128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VCVTPS2UDQMasked128Merging dst x (VPMOVVec32x4ToM mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VCVTPS2UDQ128 { + break + } + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VCVTPS2UDQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true + } + // match: (VPBLENDVB128 dst (VCVTTPS2DQ128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VCVTTPS2DQMasked128Merging dst x (VPMOVVec32x4ToM mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VCVTTPS2DQ128 { + break + } + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VCVTTPS2DQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true + } + // match: (VPBLENDVB128 dst (VDIVPD128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VDIVPDMasked128Merging dst x y (VPMOVVec64x2ToM mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VDIVPD128 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSRLVWMasked512Merging) - v.AddArg4(dst, x, y, mask) + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VDIVPDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDMWMasked512 dst (VPSLLW512const [a] x) mask) - // result: (VPSLLWMasked512constMerging dst [a] x mask) + // match: (VPBLENDVB128 dst (VDIVPS128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VDIVPSMasked128Merging dst x y (VPMOVVec32x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSLLW512const { + if v_1.Op != OpAMD64VDIVPS128 { break } - a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPSLLWMasked512constMerging) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(dst, x, mask) + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VDIVPSMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDMWMasked512 dst (VPADDSW512 x y) mask) - // result: (VPADDSWMasked512Merging dst x y mask) + // match: (VPBLENDVB128 dst (VGF2P8MULB128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VGF2P8MULBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPADDSW512 { + if v_1.Op != OpAMD64VGF2P8MULB128 { + break + } + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VGF2P8MULBMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) + return true + } + // match: (VPBLENDVB128 dst (VMAXPD128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VMAXPDMasked128Merging dst x y (VPMOVVec64x2ToM mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VMAXPD128 { + break + } + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VMAXPDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) + return true + } + // match: (VPBLENDVB128 dst (VMAXPS128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VMAXPSMasked128Merging dst x y (VPMOVVec32x4ToM mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VMAXPS128 { + break + } + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VMAXPSMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) + return true + } + // match: (VPBLENDVB128 dst (VMINPD128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VMINPDMasked128Merging dst x y (VPMOVVec64x2ToM mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VMINPD128 { + break + } + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VMINPDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) + return true + } + // match: (VPBLENDVB128 dst (VMINPS128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VMINPSMasked128Merging dst x y (VPMOVVec32x4ToM mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VMINPS128 { + break + } + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VMINPSMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) + return true + } + // match: (VPBLENDVB128 dst (VMULPD128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VMULPDMasked128Merging dst x y (VPMOVVec64x2ToM mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VMULPD128 { + break + } + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VMULPDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) + return true + } + // match: (VPBLENDVB128 dst (VMULPS128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VMULPSMasked128Merging dst x y (VPMOVVec32x4ToM mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VMULPS128 { + break + } + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VMULPSMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) + return true + } + // match: (VPBLENDVB128 dst (VPABSB128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPABSBMasked128Merging dst x (VPMOVVec8x16ToM mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VPABSB128 { + break + } + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPABSBMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true + } + // match: (VPBLENDVB128 dst (VPABSD128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPABSDMasked128Merging dst x (VPMOVVec32x4ToM mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VPABSD128 { + break + } + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPABSDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true + } + // match: (VPBLENDVB128 dst (VPABSQ128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPABSQMasked128Merging dst x (VPMOVVec64x2ToM mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VPABSQ128 { + break + } + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPABSQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true + } + // match: (VPBLENDVB128 dst (VPABSW128 x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPABSWMasked128Merging dst x (VPMOVVec16x8ToM mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VPABSW128 { + break + } + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPABSWMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true + } + // match: (VPBLENDVB128 dst (VPACKSSDW128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPACKSSDWMasked128Merging dst x y (VPMOVVec32x4ToM mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VPACKSSDW128 { + break + } + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPACKSSDWMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) + return true + } + // match: (VPBLENDVB128 dst (VPACKUSDW128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPACKUSDWMasked128Merging dst x y (VPMOVVec32x4ToM mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VPACKUSDW128 { + break + } + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPACKUSDWMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) + return true + } + // match: (VPBLENDVB128 dst (VPADDB128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPADDBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VPADDB128 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPADDSWMasked512Merging) - v.AddArg4(dst, x, y, mask) + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPADDBMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDMWMasked512 dst (VPMULLW512 x y) mask) - // result: (VPMULLWMasked512Merging dst x y mask) + // match: (VPBLENDVB128 dst (VPADDD128 x y) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPADDDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMULLW512 { + if v_1.Op != OpAMD64VPADDD128 { break } y := v_1.Args[1] x := v_1.Args[0] mask := v_2 - v.reset(OpAMD64VPMULLWMasked512Merging) - v.AddArg4(dst, x, y, mask) + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPADDDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (VPBLENDVB128 dst (VPMINUD128 x y) mask) + // match: (VPBLENDVB128 dst (VPADDQ128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMINUDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) + // result: (VPADDQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMINUD128 { + if v_1.Op != OpAMD64VPADDQ128 { break } y := v_1.Args[1] @@ -42246,39 +43079,38 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMINUDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VPADDQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPROLQ128 [a] x) mask) + // match: (VPBLENDVB128 dst (VPADDSB128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPROLQMasked128Merging dst [a] x (VPMOVVec64x2ToM mask)) + // result: (VPADDSBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPROLQ128 { + if v_1.Op != OpAMD64VPADDSB128 { break } - a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPROLQMasked128Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v.reset(OpAMD64VPADDSBMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPMADDUBSW128 x y) mask) + // match: (VPBLENDVB128 dst (VPADDSW128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMADDUBSWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) + // result: (VPADDSWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMADDUBSW128 { + if v_1.Op != OpAMD64VPADDSW128 { break } y := v_1.Args[1] @@ -42287,18 +43119,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMADDUBSWMasked128Merging) + v.reset(OpAMD64VPADDSWMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPMAXSB128 x y) mask) + // match: (VPBLENDVB128 dst (VPADDUSB128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMAXSBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) + // result: (VPADDUSBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMAXSB128 { + if v_1.Op != OpAMD64VPADDUSB128 { break } y := v_1.Args[1] @@ -42307,18 +43139,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMAXSBMasked128Merging) + v.reset(OpAMD64VPADDUSBMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPADDSB128 x y) mask) + // match: (VPBLENDVB128 dst (VPADDUSW128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPADDSBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) + // result: (VPADDUSWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPADDSB128 { + if v_1.Op != OpAMD64VPADDUSW128 { break } y := v_1.Args[1] @@ -42327,56 +43159,58 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPADDSBMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v.reset(OpAMD64VPADDUSWMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VBROADCASTSS256 x) mask) + // match: (VPBLENDVB128 dst (VPADDW128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VBROADCASTSSMasked256Merging dst x (VPMOVVec32x4ToM mask)) + // result: (VPADDWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VBROADCASTSS256 { + if v_1.Op != OpAMD64VPADDW128 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VBROADCASTSSMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VPADDWMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVSXBW128 x) mask) + // match: (VPBLENDVB128 dst (VPAVGB128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSXBWMasked128Merging dst x (VPMOVVec8x16ToM mask)) + // result: (VPAVGBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVSXBW128 { + if v_1.Op != OpAMD64VPAVGB128 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVSXBWMasked128Merging) + v.reset(OpAMD64VPAVGBMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPMINSQ128 x y) mask) + // match: (VPBLENDVB128 dst (VPAVGW128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMINSQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) + // result: (VPAVGWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMINSQ128 { + if v_1.Op != OpAMD64VPAVGW128 { break } y := v_1.Args[1] @@ -42385,38 +43219,37 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMINSQMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v.reset(OpAMD64VPAVGWMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPSUBUSW128 x y) mask) + // match: (VPBLENDVB128 dst (VPBROADCASTB128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSUBUSWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) + // result: (VPBROADCASTBMasked128Merging dst x (VPMOVVec8x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSUBUSW128 { + if v_1.Op != OpAMD64VPBROADCASTB128 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSUBUSWMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v.reset(OpAMD64VPBROADCASTBMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVSXBQ512 x) mask) + // match: (VPBLENDVB128 dst (VPBROADCASTB256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSXBQMasked512Merging dst x (VPMOVVec8x16ToM mask)) + // result: (VPBROADCASTBMasked256Merging dst x (VPMOVVec8x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVSXBQ512 { + if v_1.Op != OpAMD64VPBROADCASTB256 { break } x := v_1.Args[0] @@ -42424,18 +43257,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVSXBQMasked512Merging) + v.reset(OpAMD64VPBROADCASTBMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVZXWQ256 x) mask) + // match: (VPBLENDVB128 dst (VPBROADCASTB512 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVZXWQMasked256Merging dst x (VPMOVVec16x8ToM mask)) + // result: (VPBROADCASTBMasked512Merging dst x (VPMOVVec8x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVZXWQ256 { + if v_1.Op != OpAMD64VPBROADCASTB512 { break } x := v_1.Args[0] @@ -42443,60 +43276,56 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVZXWQMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v.reset(OpAMD64VPBROADCASTBMasked512Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPMULLW128 x y) mask) + // match: (VPBLENDVB128 dst (VPBROADCASTD128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMULLWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) + // result: (VPBROADCASTDMasked128Merging dst x (VPMOVVec32x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMULLW128 { + if v_1.Op != OpAMD64VPBROADCASTD128 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMULLWMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v.reset(OpAMD64VPBROADCASTDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPSHLDQ128 [a] x y) mask) + // match: (VPBLENDVB128 dst (VPBROADCASTD256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSHLDQMasked128Merging dst [a] x y (VPMOVVec64x2ToM mask)) + // result: (VPBROADCASTDMasked256Merging dst x (VPMOVVec32x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSHLDQ128 { + if v_1.Op != OpAMD64VPBROADCASTD256 { break } - a := auxIntToUint8(v_1.AuxInt) - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHLDQMasked128Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v.reset(OpAMD64VPBROADCASTDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVZXBQ256 x) mask) + // match: (VPBLENDVB128 dst (VPBROADCASTD512 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVZXBQMasked256Merging dst x (VPMOVVec8x16ToM mask)) + // result: (VPBROADCASTDMasked512Merging dst x (VPMOVVec32x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVZXBQ256 { + if v_1.Op != OpAMD64VPBROADCASTD512 { break } x := v_1.Args[0] @@ -42504,38 +43333,37 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVZXBQMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v.reset(OpAMD64VPBROADCASTDMasked512Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPMAXSQ128 x y) mask) + // match: (VPBLENDVB128 dst (VPBROADCASTQ128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMAXSQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) + // result: (VPBROADCASTQMasked128Merging dst x (VPMOVVec64x2ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMAXSQ128 { + if v_1.Op != OpAMD64VPBROADCASTQ128 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMAXSQMasked128Merging) + v.reset(OpAMD64VPBROADCASTQMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPOPCNTW128 x) mask) + // match: (VPBLENDVB128 dst (VPBROADCASTQ256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPOPCNTWMasked128Merging dst x (VPMOVVec16x8ToM mask)) + // result: (VPBROADCASTQMasked256Merging dst x (VPMOVVec64x2ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPOPCNTW128 { + if v_1.Op != OpAMD64VPBROADCASTQ256 { break } x := v_1.Args[0] @@ -42543,18 +43371,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPOPCNTWMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v.reset(OpAMD64VPBROADCASTQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPBROADCASTW128 x) mask) + // match: (VPBLENDVB128 dst (VPBROADCASTQ512 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPBROADCASTWMasked128Merging dst x (VPMOVVec16x8ToM mask)) + // result: (VPBROADCASTQMasked512Merging dst x (VPMOVVec64x2ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPBROADCASTW128 { + if v_1.Op != OpAMD64VPBROADCASTQ512 { break } x := v_1.Args[0] @@ -42562,38 +43390,37 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPBROADCASTWMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v.reset(OpAMD64VPBROADCASTQMasked512Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPRORVD128 x y) mask) + // match: (VPBLENDVB128 dst (VPBROADCASTW128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPRORVDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) + // result: (VPBROADCASTWMasked128Merging dst x (VPMOVVec16x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPRORVD128 { + if v_1.Op != OpAMD64VPBROADCASTW128 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPRORVDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VPBROADCASTWMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VBROADCASTSD256 x) mask) + // match: (VPBLENDVB128 dst (VPBROADCASTW256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VBROADCASTSDMasked256Merging dst x (VPMOVVec64x2ToM mask)) + // result: (VPBROADCASTWMasked256Merging dst x (VPMOVVec16x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VBROADCASTSD256 { + if v_1.Op != OpAMD64VPBROADCASTW256 { break } x := v_1.Args[0] @@ -42601,18 +43428,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VBROADCASTSDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v.reset(OpAMD64VPBROADCASTWMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVZXDQ128 x) mask) + // match: (VPBLENDVB128 dst (VPBROADCASTW512 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVZXDQMasked128Merging dst x (VPMOVVec32x4ToM mask)) + // result: (VPBROADCASTWMasked512Merging dst x (VPMOVVec16x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVZXDQ128 { + if v_1.Op != OpAMD64VPBROADCASTW512 { break } x := v_1.Args[0] @@ -42620,78 +43447,76 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVZXDQMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VPBROADCASTWMasked512Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPSRAQ128const [a] x) mask) + // match: (VPBLENDVB128 dst (VPLZCNTD128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRAQMasked128constMerging dst [a] x (VPMOVVec64x2ToM mask)) + // result: (VPLZCNTDMasked128Merging dst x (VPMOVVec32x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSRAQ128const { + if v_1.Op != OpAMD64VPLZCNTD128 { break } - a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRAQMasked128constMerging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v.reset(OpAMD64VPLZCNTDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPACKUSDW128 x y) mask) + // match: (VPBLENDVB128 dst (VPLZCNTQ128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPACKUSDWMasked128Merging dst x y (VPMOVVec32x4ToM mask)) + // result: (VPLZCNTQMasked128Merging dst x (VPMOVVec64x2ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPACKUSDW128 { + if v_1.Op != OpAMD64VPLZCNTQ128 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPACKUSDWMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VPLZCNTQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPLZCNTD128 x) mask) + // match: (VPBLENDVB128 dst (VPMADDUBSW128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPLZCNTDMasked128Merging dst x (VPMOVVec32x4ToM mask)) + // result: (VPMADDUBSWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPLZCNTD128 { + if v_1.Op != OpAMD64VPMADDUBSW128 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPLZCNTDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VPMADDUBSWMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPMAXUD128 x y) mask) + // match: (VPBLENDVB128 dst (VPMADDWD128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMAXUDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) + // result: (VPMADDWDMasked128Merging dst x y (VPMOVVec16x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMAXUD128 { + if v_1.Op != OpAMD64VPMADDWD128 { break } y := v_1.Args[1] @@ -42700,56 +43525,58 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMAXUDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VPMADDWDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPOPCNTB128 x) mask) + // match: (VPBLENDVB128 dst (VPMAXSB128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPOPCNTBMasked128Merging dst x (VPMOVVec8x16ToM mask)) + // result: (VPMAXSBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPOPCNTB128 { + if v_1.Op != OpAMD64VPMAXSB128 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPOPCNTBMasked128Merging) + v.reset(OpAMD64VPMAXSBMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VBROADCASTSD512 x) mask) + // match: (VPBLENDVB128 dst (VPMAXSD128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VBROADCASTSDMasked512Merging dst x (VPMOVVec64x2ToM mask)) + // result: (VPMAXSDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VBROADCASTSD512 { + if v_1.Op != OpAMD64VPMAXSD128 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VBROADCASTSDMasked512Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v.reset(OpAMD64VPMAXSDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VMINPD128 x y) mask) + // match: (VPBLENDVB128 dst (VPMAXSQ128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VMINPDMasked128Merging dst x y (VPMOVVec64x2ToM mask)) + // result: (VPMAXSQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VMINPD128 { + if v_1.Op != OpAMD64VPMAXSQ128 { break } y := v_1.Args[1] @@ -42758,40 +43585,38 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VMINPDMasked128Merging) + v.reset(OpAMD64VPMAXSQMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPSHRDW128 [a] x y) mask) + // match: (VPBLENDVB128 dst (VPMAXSW128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSHRDWMasked128Merging dst [a] x y (VPMOVVec16x8ToM mask)) + // result: (VPMAXSWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSHRDW128 { + if v_1.Op != OpAMD64VPMAXSW128 { break } - a := auxIntToUint8(v_1.AuxInt) y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHRDWMasked128Merging) - v.AuxInt = uint8ToAuxInt(a) + v.reset(OpAMD64VPMAXSWMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VADDPD128 x y) mask) + // match: (VPBLENDVB128 dst (VPMAXUB128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VADDPDMasked128Merging dst x y (VPMOVVec64x2ToM mask)) + // result: (VPMAXUBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VADDPD128 { + if v_1.Op != OpAMD64VPMAXUB128 { break } y := v_1.Args[1] @@ -42800,56 +43625,58 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VADDPDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v.reset(OpAMD64VPMAXUBMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVZXWD256 x) mask) + // match: (VPBLENDVB128 dst (VPMAXUD128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVZXWDMasked256Merging dst x (VPMOVVec16x8ToM mask)) + // result: (VPMAXUDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVZXWD256 { + if v_1.Op != OpAMD64VPMAXUD128 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVZXWDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v.reset(OpAMD64VPMAXUDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVSXWQ256 x) mask) + // match: (VPBLENDVB128 dst (VPMAXUQ128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSXWQMasked256Merging dst x (VPMOVVec16x8ToM mask)) + // result: (VPMAXUQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVSXWQ256 { + if v_1.Op != OpAMD64VPMAXUQ128 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVSXWQMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v.reset(OpAMD64VPMAXUQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPSUBSW128 x y) mask) + // match: (VPBLENDVB128 dst (VPMAXUW128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSUBSWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) + // result: (VPMAXUWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSUBSW128 { + if v_1.Op != OpAMD64VPMAXUW128 { break } y := v_1.Args[1] @@ -42858,79 +43685,78 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSUBSWMasked128Merging) + v.reset(OpAMD64VPMAXUWMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VREDUCEPD128 [a] x) mask) + // match: (VPBLENDVB128 dst (VPMINSB128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VREDUCEPDMasked128Merging dst [a] x (VPMOVVec64x2ToM mask)) + // result: (VPMINSBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VREDUCEPD128 { + if v_1.Op != OpAMD64VPMINSB128 { break } - a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VREDUCEPDMasked128Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v.reset(OpAMD64VPMINSBMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVZXBD256 x) mask) + // match: (VPBLENDVB128 dst (VPMINSD128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVZXBDMasked256Merging dst x (VPMOVVec8x16ToM mask)) + // result: (VPMINSDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVZXBD256 { + if v_1.Op != OpAMD64VPMINSD128 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVZXBDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v.reset(OpAMD64VPMINSDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPRORQ128 [a] x) mask) + // match: (VPBLENDVB128 dst (VPMINSQ128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPRORQMasked128Merging dst [a] x (VPMOVVec64x2ToM mask)) + // result: (VPMINSQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPRORQ128 { + if v_1.Op != OpAMD64VPMINSQ128 { break } - a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPRORQMasked128Merging) - v.AuxInt = uint8ToAuxInt(a) + v.reset(OpAMD64VPMINSQMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPSLLVW128 x y) mask) + // match: (VPBLENDVB128 dst (VPMINSW128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSLLVWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) + // result: (VPMINSWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSLLVW128 { + if v_1.Op != OpAMD64VPMINSW128 { break } y := v_1.Args[1] @@ -42939,37 +43765,38 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSLLVWMasked128Merging) + v.reset(OpAMD64VPMINSWMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVSXBW256 x) mask) + // match: (VPBLENDVB128 dst (VPMINUB128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSXBWMasked256Merging dst x (VPMOVVec8x16ToM mask)) + // result: (VPMINUBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVSXBW256 { + if v_1.Op != OpAMD64VPMINUB128 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVSXBWMasked256Merging) + v.reset(OpAMD64VPMINUBMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPMINSD128 x y) mask) + // match: (VPBLENDVB128 dst (VPMINUD128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMINSDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) + // result: (VPMINUDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMINSD128 { + if v_1.Op != OpAMD64VPMINUD128 { break } y := v_1.Args[1] @@ -42978,18 +43805,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMINSDMasked128Merging) + v.reset(OpAMD64VPMINUDMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VADDPS128 x y) mask) + // match: (VPBLENDVB128 dst (VPMINUQ128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VADDPSMasked128Merging dst x y (VPMOVVec32x4ToM mask)) + // result: (VPMINUQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VADDPS128 { + if v_1.Op != OpAMD64VPMINUQ128 { break } y := v_1.Args[1] @@ -42998,37 +43825,38 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VADDPSMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VPMINUQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVSXBD256 x) mask) + // match: (VPBLENDVB128 dst (VPMINUW128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSXBDMasked256Merging dst x (VPMOVVec8x16ToM mask)) + // result: (VPMINUWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVSXBD256 { + if v_1.Op != OpAMD64VPMINUW128 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVSXBDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v.reset(OpAMD64VPMINUWMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVSXDQ128 x) mask) + // match: (VPBLENDVB128 dst (VPMOVDB128_128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSXDQMasked128Merging dst x (VPMOVVec32x4ToM mask)) + // result: (VPMOVDBMasked128_128Merging dst x (VPMOVVec32x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVSXDQ128 { + if v_1.Op != OpAMD64VPMOVDB128_128 { break } x := v_1.Args[0] @@ -43036,138 +43864,132 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVSXDQMasked128Merging) + v.reset(OpAMD64VPMOVDBMasked128_128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPROLVD128 x y) mask) + // match: (VPBLENDVB128 dst (VPMOVDW128_128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPROLVDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) + // result: (VPMOVDWMasked128_128Merging dst x (VPMOVVec32x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPROLVD128 { + if v_1.Op != OpAMD64VPMOVDW128_128 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPROLVDMasked128Merging) + v.reset(OpAMD64VPMOVDWMasked128_128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPSRLVQ128 x y) mask) + // match: (VPBLENDVB128 dst (VPMOVQB128_128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRLVQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) + // result: (VPMOVQBMasked128_128Merging dst x (VPMOVVec64x2ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSRLVQ128 { + if v_1.Op != OpAMD64VPMOVQB128_128 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRLVQMasked128Merging) + v.reset(OpAMD64VPMOVQBMasked128_128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPMAXSD128 x y) mask) + // match: (VPBLENDVB128 dst (VPMOVQD128_128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMAXSDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) + // result: (VPMOVQDMasked128_128Merging dst x (VPMOVVec64x2ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMAXSD128 { + if v_1.Op != OpAMD64VPMOVQD128_128 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMAXSDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VPMOVQDMasked128_128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPMINUB128 x y) mask) + // match: (VPBLENDVB128 dst (VPMOVQW128_128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMINUBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) + // result: (VPMOVQWMasked128_128Merging dst x (VPMOVVec64x2ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMINUB128 { + if v_1.Op != OpAMD64VPMOVQW128_128 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMINUBMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v.reset(OpAMD64VPMOVQWMasked128_128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPMULLQ128 x y) mask) + // match: (VPBLENDVB128 dst (VPMOVSDB128_128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMULLQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) + // result: (VPMOVSDBMasked128_128Merging dst x (VPMOVVec32x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMULLQ128 { + if v_1.Op != OpAMD64VPMOVSDB128_128 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMULLQMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v.reset(OpAMD64VPMOVSDBMasked128_128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPSUBD128 x y) mask) + // match: (VPBLENDVB128 dst (VPMOVSDW128_128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSUBDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) + // result: (VPMOVSDWMasked128_128Merging dst x (VPMOVVec32x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSUBD128 { + if v_1.Op != OpAMD64VPMOVSDW128_128 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSUBDMasked128Merging) + v.reset(OpAMD64VPMOVSDWMasked128_128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPBROADCASTD512 x) mask) + // match: (VPBLENDVB128 dst (VPMOVSQB128_128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPBROADCASTDMasked512Merging dst x (VPMOVVec32x4ToM mask)) + // result: (VPMOVSQBMasked128_128Merging dst x (VPMOVVec64x2ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPBROADCASTD512 { + if v_1.Op != OpAMD64VPMOVSQB128_128 { break } x := v_1.Args[0] @@ -43175,120 +43997,113 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPBROADCASTDMasked512Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VPMOVSQBMasked128_128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPMADDWD128 x y) mask) + // match: (VPBLENDVB128 dst (VPMOVSQD128_128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMADDWDMasked128Merging dst x y (VPMOVVec16x8ToM mask)) + // result: (VPMOVSQDMasked128_128Merging dst x (VPMOVVec64x2ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMADDWD128 { + if v_1.Op != OpAMD64VPMOVSQD128_128 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMADDWDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v.reset(OpAMD64VPMOVSQDMasked128_128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPROLD128 [a] x) mask) + // match: (VPBLENDVB128 dst (VPMOVSQW128_128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPROLDMasked128Merging dst [a] x (VPMOVVec32x4ToM mask)) + // result: (VPMOVSQWMasked128_128Merging dst x (VPMOVVec64x2ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPROLD128 { + if v_1.Op != OpAMD64VPMOVSQW128_128 { break } - a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPROLDMasked128Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VPMOVSQWMasked128_128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPSRAD128const [a] x) mask) + // match: (VPBLENDVB128 dst (VPMOVSWB128_128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRADMasked128constMerging dst [a] x (VPMOVVec32x4ToM mask)) + // result: (VPMOVSWBMasked128_128Merging dst x (VPMOVVec16x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSRAD128const { + if v_1.Op != OpAMD64VPMOVSWB128_128 { break } - a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRADMasked128constMerging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VPMOVSWBMasked128_128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPSUBUSB128 x y) mask) + // match: (VPBLENDVB128 dst (VPMOVSXBD128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSUBUSBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) + // result: (VPMOVSXBDMasked128Merging dst x (VPMOVVec8x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSUBUSB128 { + if v_1.Op != OpAMD64VPMOVSXBD128 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSUBUSBMasked128Merging) + v.reset(OpAMD64VPMOVSXBDMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPADDUSB128 x y) mask) + // match: (VPBLENDVB128 dst (VPMOVSXBD256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPADDUSBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) + // result: (VPMOVSXBDMasked256Merging dst x (VPMOVVec8x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPADDUSB128 { + if v_1.Op != OpAMD64VPMOVSXBD256 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPADDUSBMasked128Merging) + v.reset(OpAMD64VPMOVSXBDMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVZXBW128 x) mask) + // match: (VPBLENDVB128 dst (VPMOVSXBD512 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVZXBWMasked128Merging dst x (VPMOVVec8x16ToM mask)) + // result: (VPMOVSXBDMasked512Merging dst x (VPMOVVec8x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVZXBW128 { + if v_1.Op != OpAMD64VPMOVSXBD512 { break } x := v_1.Args[0] @@ -43296,18 +44111,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVZXBWMasked128Merging) + v.reset(OpAMD64VPMOVSXBDMasked512Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVZXDQ256 x) mask) + // match: (VPBLENDVB128 dst (VPMOVSXBQ128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVZXDQMasked256Merging dst x (VPMOVVec32x4ToM mask)) + // result: (VPMOVSXBQMasked128Merging dst x (VPMOVVec8x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVZXDQ256 { + if v_1.Op != OpAMD64VPMOVSXBQ128 { break } x := v_1.Args[0] @@ -43315,98 +44130,94 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVZXDQMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VPMOVSXBQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPROLVQ128 x y) mask) + // match: (VPBLENDVB128 dst (VPMOVSXBQ256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPROLVQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) + // result: (VPMOVSXBQMasked256Merging dst x (VPMOVVec8x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPROLVQ128 { + if v_1.Op != OpAMD64VPMOVSXBQ256 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPROLVQMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v.reset(OpAMD64VPMOVSXBQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPADDB128 x y) mask) + // match: (VPBLENDVB128 dst (VPMOVSXBQ512 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPADDBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) + // result: (VPMOVSXBQMasked512Merging dst x (VPMOVVec8x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPADDB128 { + if v_1.Op != OpAMD64VPMOVSXBQ512 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPADDBMasked128Merging) + v.reset(OpAMD64VPMOVSXBQMasked512Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPADDQ128 x y) mask) + // match: (VPBLENDVB128 dst (VPMOVSXBW128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPADDQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) + // result: (VPMOVSXBWMasked128Merging dst x (VPMOVVec8x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPADDQ128 { + if v_1.Op != OpAMD64VPMOVSXBW128 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPADDQMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v.reset(OpAMD64VPMOVSXBWMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPADDUSW128 x y) mask) + // match: (VPBLENDVB128 dst (VPMOVSXBW256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPADDUSWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) + // result: (VPMOVSXBWMasked256Merging dst x (VPMOVVec8x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPADDUSW128 { + if v_1.Op != OpAMD64VPMOVSXBW256 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPADDUSWMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v.reset(OpAMD64VPMOVSXBWMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPBROADCASTB128 x) mask) + // match: (VPBLENDVB128 dst (VPMOVSXDQ128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPBROADCASTBMasked128Merging dst x (VPMOVVec8x16ToM mask)) + // result: (VPMOVSXDQMasked128Merging dst x (VPMOVVec32x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPBROADCASTB128 { + if v_1.Op != OpAMD64VPMOVSXDQ128 { break } x := v_1.Args[0] @@ -43414,140 +44225,132 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPBROADCASTBMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v.reset(OpAMD64VPMOVSXDQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VRNDSCALEPS128 [a] x) mask) + // match: (VPBLENDVB128 dst (VPMOVSXDQ256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VRNDSCALEPSMasked128Merging dst [a] x (VPMOVVec32x4ToM mask)) + // result: (VPMOVSXDQMasked256Merging dst x (VPMOVVec32x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VRNDSCALEPS128 { + if v_1.Op != OpAMD64VPMOVSXDQ256 { break } - a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VRNDSCALEPSMasked128Merging) - v.AuxInt = uint8ToAuxInt(a) + v.reset(OpAMD64VPMOVSXDQMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPMINUW128 x y) mask) + // match: (VPBLENDVB128 dst (VPMOVSXWD128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMINUWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) + // result: (VPMOVSXWDMasked128Merging dst x (VPMOVVec16x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMINUW128 { + if v_1.Op != OpAMD64VPMOVSXWD128 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMINUWMasked128Merging) + v.reset(OpAMD64VPMOVSXWDMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPMINSW128 x y) mask) + // match: (VPBLENDVB128 dst (VPMOVSXWD256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMINSWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) + // result: (VPMOVSXWDMasked256Merging dst x (VPMOVVec16x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMINSW128 { + if v_1.Op != OpAMD64VPMOVSXWD256 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMINSWMasked128Merging) + v.reset(OpAMD64VPMOVSXWDMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPMULLD128 x y) mask) + // match: (VPBLENDVB128 dst (VPMOVSXWQ128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMULLDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) + // result: (VPMOVSXWQMasked128Merging dst x (VPMOVVec16x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMULLD128 { + if v_1.Op != OpAMD64VPMOVSXWQ128 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMULLDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VPMOVSXWQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPSHUFB128 x y) mask) + // match: (VPBLENDVB128 dst (VPMOVSXWQ256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSHUFBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) + // result: (VPMOVSXWQMasked256Merging dst x (VPMOVVec16x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSHUFB128 { + if v_1.Op != OpAMD64VPMOVSXWQ256 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHUFBMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v.reset(OpAMD64VPMOVSXWQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPRORD128 [a] x) mask) + // match: (VPBLENDVB128 dst (VPMOVSXWQ512 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPRORDMasked128Merging dst [a] x (VPMOVVec32x4ToM mask)) + // result: (VPMOVSXWQMasked512Merging dst x (VPMOVVec16x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPRORD128 { + if v_1.Op != OpAMD64VPMOVSXWQ512 { break } - a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPRORDMasked128Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VPMOVSXWQMasked512Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VCVTTPS2DQ128 x) mask) + // match: (VPBLENDVB128 dst (VPMOVUSDB128_128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VCVTTPS2DQMasked128Merging dst x (VPMOVVec32x4ToM mask)) + // result: (VPMOVUSDBMasked128_128Merging dst x (VPMOVVec32x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VCVTTPS2DQ128 { + if v_1.Op != OpAMD64VPMOVUSDB128_128 { break } x := v_1.Args[0] @@ -43555,58 +44358,56 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VCVTTPS2DQMasked128Merging) + v.reset(OpAMD64VPMOVUSDBMasked128_128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VMINPS128 x y) mask) + // match: (VPBLENDVB128 dst (VPMOVUSDW128_128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VMINPSMasked128Merging dst x y (VPMOVVec32x4ToM mask)) + // result: (VPMOVUSDWMasked128_128Merging dst x (VPMOVVec32x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VMINPS128 { + if v_1.Op != OpAMD64VPMOVUSDW128_128 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VMINPSMasked128Merging) + v.reset(OpAMD64VPMOVUSDWMasked128_128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VSUBPD128 x y) mask) + // match: (VPBLENDVB128 dst (VPMOVUSQB128_128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VSUBPDMasked128Merging dst x y (VPMOVVec64x2ToM mask)) + // result: (VPMOVUSQBMasked128_128Merging dst x (VPMOVVec64x2ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VSUBPD128 { + if v_1.Op != OpAMD64VPMOVUSQB128_128 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VSUBPDMasked128Merging) + v.reset(OpAMD64VPMOVUSQBMasked128_128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPBROADCASTB512 x) mask) + // match: (VPBLENDVB128 dst (VPMOVUSQD128_128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPBROADCASTBMasked512Merging dst x (VPMOVVec8x16ToM mask)) + // result: (VPMOVUSQDMasked128_128Merging dst x (VPMOVVec64x2ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPBROADCASTB512 { + if v_1.Op != OpAMD64VPMOVUSQD128_128 { break } x := v_1.Args[0] @@ -43614,18 +44415,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPBROADCASTBMasked512Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v.reset(OpAMD64VPMOVUSQDMasked128_128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VRCP14PD128 x) mask) + // match: (VPBLENDVB128 dst (VPMOVUSQW128_128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VRCP14PDMasked128Merging dst x (VPMOVVec64x2ToM mask)) + // result: (VPMOVUSQWMasked128_128Merging dst x (VPMOVVec64x2ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VRCP14PD128 { + if v_1.Op != OpAMD64VPMOVUSQW128_128 { break } x := v_1.Args[0] @@ -43633,18 +44434,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VRCP14PDMasked128Merging) + v.reset(OpAMD64VPMOVUSQWMasked128_128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVSXWD256 x) mask) + // match: (VPBLENDVB128 dst (VPMOVUSWB128_128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSXWDMasked256Merging dst x (VPMOVVec16x8ToM mask)) + // result: (VPMOVUSWBMasked128_128Merging dst x (VPMOVVec16x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVSXWD256 { + if v_1.Op != OpAMD64VPMOVUSWB128_128 { break } x := v_1.Args[0] @@ -43652,18 +44453,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVSXWDMasked256Merging) + v.reset(OpAMD64VPMOVUSWBMasked128_128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPBROADCASTW256 x) mask) + // match: (VPBLENDVB128 dst (VPMOVWB128_128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPBROADCASTWMasked256Merging dst x (VPMOVVec16x8ToM mask)) + // result: (VPMOVWBMasked128_128Merging dst x (VPMOVVec16x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPBROADCASTW256 { + if v_1.Op != OpAMD64VPMOVWB128_128 { break } x := v_1.Args[0] @@ -43671,18 +44472,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPBROADCASTWMasked256Merging) + v.reset(OpAMD64VPMOVWBMasked128_128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPBROADCASTD256 x) mask) + // match: (VPBLENDVB128 dst (VPMOVZXBD128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPBROADCASTDMasked256Merging dst x (VPMOVVec32x4ToM mask)) + // result: (VPMOVZXBDMasked128Merging dst x (VPMOVVec8x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPBROADCASTD256 { + if v_1.Op != OpAMD64VPMOVZXBD128 { break } x := v_1.Args[0] @@ -43690,38 +44491,37 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPBROADCASTDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VPMOVZXBDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPADDD128 x y) mask) + // match: (VPBLENDVB128 dst (VPMOVZXBD256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPADDDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) + // result: (VPMOVZXBDMasked256Merging dst x (VPMOVVec8x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPADDD128 { + if v_1.Op != OpAMD64VPMOVZXBD256 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPADDDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VPMOVZXBDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VBROADCASTSS128 x) mask) + // match: (VPBLENDVB128 dst (VPMOVZXBD512 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VBROADCASTSSMasked128Merging dst x (VPMOVVec32x4ToM mask)) + // result: (VPMOVZXBDMasked512Merging dst x (VPMOVVec8x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VBROADCASTSS128 { + if v_1.Op != OpAMD64VPMOVZXBD512 { break } x := v_1.Args[0] @@ -43729,18 +44529,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VBROADCASTSSMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VPMOVZXBDMasked512Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVSXDQ256 x) mask) + // match: (VPBLENDVB128 dst (VPMOVZXBQ128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSXDQMasked256Merging dst x (VPMOVVec32x4ToM mask)) + // result: (VPMOVZXBQMasked128Merging dst x (VPMOVVec8x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVSXDQ256 { + if v_1.Op != OpAMD64VPMOVZXBQ128 { break } x := v_1.Args[0] @@ -43748,18 +44548,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVSXDQMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VPMOVZXBQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVSXBD512 x) mask) + // match: (VPBLENDVB128 dst (VPMOVZXBQ256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSXBDMasked512Merging dst x (VPMOVVec8x16ToM mask)) + // result: (VPMOVZXBQMasked256Merging dst x (VPMOVVec8x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVSXBD512 { + if v_1.Op != OpAMD64VPMOVZXBQ256 { break } x := v_1.Args[0] @@ -43767,102 +44567,94 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVSXBDMasked512Merging) + v.reset(OpAMD64VPMOVZXBQMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPSHLDW128 [a] x y) mask) + // match: (VPBLENDVB128 dst (VPMOVZXBQ512 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSHLDWMasked128Merging dst [a] x y (VPMOVVec16x8ToM mask)) + // result: (VPMOVZXBQMasked512Merging dst x (VPMOVVec8x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSHLDW128 { + if v_1.Op != OpAMD64VPMOVZXBQ512 { break } - a := auxIntToUint8(v_1.AuxInt) - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHLDWMasked128Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v.reset(OpAMD64VPMOVZXBQMasked512Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPMAXUQ128 x y) mask) + // match: (VPBLENDVB128 dst (VPMOVZXBW128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMAXUQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) + // result: (VPMOVZXBWMasked128Merging dst x (VPMOVVec8x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMAXUQ128 { + if v_1.Op != OpAMD64VPMOVZXBW128 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMAXUQMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v.reset(OpAMD64VPMOVZXBWMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPSHLDD128 [a] x y) mask) + // match: (VPBLENDVB128 dst (VPMOVZXBW256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSHLDDMasked128Merging dst [a] x y (VPMOVVec32x4ToM mask)) + // result: (VPMOVZXBWMasked256Merging dst x (VPMOVVec8x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSHLDD128 { + if v_1.Op != OpAMD64VPMOVZXBW256 { break } - a := auxIntToUint8(v_1.AuxInt) - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHLDDMasked128Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VPMOVZXBWMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VSUBPS128 x y) mask) + // match: (VPBLENDVB128 dst (VPMOVZXDQ128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VSUBPSMasked128Merging dst x y (VPMOVVec32x4ToM mask)) + // result: (VPMOVZXDQMasked128Merging dst x (VPMOVVec32x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VSUBPS128 { + if v_1.Op != OpAMD64VPMOVZXDQ128 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VSUBPSMasked128Merging) + v.reset(OpAMD64VPMOVZXDQMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPBROADCASTQ128 x) mask) + // match: (VPBLENDVB128 dst (VPMOVZXDQ256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPBROADCASTQMasked128Merging dst x (VPMOVVec64x2ToM mask)) + // result: (VPMOVZXDQMasked256Merging dst x (VPMOVVec32x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPBROADCASTQ128 { + if v_1.Op != OpAMD64VPMOVZXDQ256 { break } x := v_1.Args[0] @@ -43870,38 +44662,37 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPBROADCASTQMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v.reset(OpAMD64VPMOVZXDQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPRORVQ128 x y) mask) + // match: (VPBLENDVB128 dst (VPMOVZXWD128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPRORVQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) + // result: (VPMOVZXWDMasked128Merging dst x (VPMOVVec16x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPRORVQ128 { + if v_1.Op != OpAMD64VPMOVZXWD128 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPRORVQMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v.reset(OpAMD64VPMOVZXWDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VBROADCASTSS512 x) mask) + // match: (VPBLENDVB128 dst (VPMOVZXWD256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VBROADCASTSSMasked512Merging dst x (VPMOVVec32x4ToM mask)) + // result: (VPMOVZXWDMasked256Merging dst x (VPMOVVec16x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VBROADCASTSS512 { + if v_1.Op != OpAMD64VPMOVZXWD256 { break } x := v_1.Args[0] @@ -43909,18 +44700,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VBROADCASTSSMasked512Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VPMOVZXWDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVZXBD128 x) mask) + // match: (VPBLENDVB128 dst (VPMOVZXWQ128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVZXBDMasked128Merging dst x (VPMOVVec8x16ToM mask)) + // result: (VPMOVZXWQMasked128Merging dst x (VPMOVVec16x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVZXBD128 { + if v_1.Op != OpAMD64VPMOVZXWQ128 { break } x := v_1.Args[0] @@ -43928,18 +44719,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVZXBDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v.reset(OpAMD64VPMOVZXWQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVZXBQ128 x) mask) + // match: (VPBLENDVB128 dst (VPMOVZXWQ256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVZXBQMasked128Merging dst x (VPMOVVec8x16ToM mask)) + // result: (VPMOVZXWQMasked256Merging dst x (VPMOVVec16x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVZXBQ128 { + if v_1.Op != OpAMD64VPMOVZXWQ256 { break } x := v_1.Args[0] @@ -43947,18 +44738,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVZXBQMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v.reset(OpAMD64VPMOVZXWQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPABSW128 x) mask) + // match: (VPBLENDVB128 dst (VPMOVZXWQ512 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPABSWMasked128Merging dst x (VPMOVVec16x8ToM mask)) + // result: (VPMOVZXWQMasked512Merging dst x (VPMOVVec16x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPABSW128 { + if v_1.Op != OpAMD64VPMOVZXWQ512 { break } x := v_1.Args[0] @@ -43966,18 +44757,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPABSWMasked128Merging) + v.reset(OpAMD64VPMOVZXWQMasked512Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPSUBW128 x y) mask) + // match: (VPBLENDVB128 dst (VPMULHUW128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSUBWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) + // result: (VPMULHUWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSUBW128 { + if v_1.Op != OpAMD64VPMULHUW128 { break } y := v_1.Args[1] @@ -43986,37 +44777,38 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSUBWMasked128Merging) + v.reset(OpAMD64VPMULHUWMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVZXWQ128 x) mask) + // match: (VPBLENDVB128 dst (VPMULHW128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVZXWQMasked128Merging dst x (VPMOVVec16x8ToM mask)) + // result: (VPMULHWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVZXWQ128 { + if v_1.Op != OpAMD64VPMULHW128 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVZXWQMasked128Merging) + v.reset(OpAMD64VPMULHWMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VGF2P8MULB128 x y) mask) + // match: (VPBLENDVB128 dst (VPMULLD128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VGF2P8MULBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) + // result: (VPMULLDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VGF2P8MULB128 { + if v_1.Op != OpAMD64VPMULLD128 { break } y := v_1.Args[1] @@ -44025,195 +44817,196 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VGF2P8MULBMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v.reset(OpAMD64VPMULLDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPABSD128 x) mask) + // match: (VPBLENDVB128 dst (VPMULLQ128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPABSDMasked128Merging dst x (VPMOVVec32x4ToM mask)) + // result: (VPMULLQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPABSD128 { + if v_1.Op != OpAMD64VPMULLQ128 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPABSDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VPMULLQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPBROADCASTB256 x) mask) + // match: (VPBLENDVB128 dst (VPMULLW128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPBROADCASTBMasked256Merging dst x (VPMOVVec8x16ToM mask)) + // result: (VPMULLWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPBROADCASTB256 { + if v_1.Op != OpAMD64VPMULLW128 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPBROADCASTBMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v.reset(OpAMD64VPMULLWMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VMAXPD128 x y) mask) + // match: (VPBLENDVB128 dst (VPOPCNTB128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VMAXPDMasked128Merging dst x y (VPMOVVec64x2ToM mask)) + // result: (VPOPCNTBMasked128Merging dst x (VPMOVVec8x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VMAXPD128 { + if v_1.Op != OpAMD64VPOPCNTB128 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VMAXPDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v.reset(OpAMD64VPOPCNTBMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPMINUQ128 x y) mask) + // match: (VPBLENDVB128 dst (VPOPCNTD128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMINUQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) + // result: (VPOPCNTDMasked128Merging dst x (VPMOVVec32x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMINUQ128 { + if v_1.Op != OpAMD64VPOPCNTD128 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMINUQMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v.reset(OpAMD64VPOPCNTDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VMULPS128 x y) mask) + // match: (VPBLENDVB128 dst (VPOPCNTQ128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VMULPSMasked128Merging dst x y (VPMOVVec32x4ToM mask)) + // result: (VPOPCNTQMasked128Merging dst x (VPMOVVec64x2ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VMULPS128 { + if v_1.Op != OpAMD64VPOPCNTQ128 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VMULPSMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VPOPCNTQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPMULHUW128 x y) mask) + // match: (VPBLENDVB128 dst (VPOPCNTW128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMULHUWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) + // result: (VPOPCNTWMasked128Merging dst x (VPMOVVec16x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMULHUW128 { + if v_1.Op != OpAMD64VPOPCNTW128 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMULHUWMasked128Merging) + v.reset(OpAMD64VPOPCNTWMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VMULPD128 x y) mask) + // match: (VPBLENDVB128 dst (VPROLD128 [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VMULPDMasked128Merging dst x y (VPMOVVec64x2ToM mask)) + // result: (VPROLDMasked128Merging dst [a] x (VPMOVVec32x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VMULPD128 { + if v_1.Op != OpAMD64VPROLD128 { break } - y := v_1.Args[1] + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VMULPDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v.reset(OpAMD64VPROLDMasked128Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPSUBB128 x y) mask) + // match: (VPBLENDVB128 dst (VPROLQ128 [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSUBBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) + // result: (VPROLQMasked128Merging dst [a] x (VPMOVVec64x2ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSUBB128 { + if v_1.Op != OpAMD64VPROLQ128 { break } - y := v_1.Args[1] + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSUBBMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v.reset(OpAMD64VPROLQMasked128Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VCVTPS2UDQ128 x) mask) + // match: (VPBLENDVB128 dst (VPROLVD128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VCVTPS2UDQMasked128Merging dst x (VPMOVVec32x4ToM mask)) + // result: (VPROLVDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VCVTPS2UDQ128 { + if v_1.Op != OpAMD64VPROLVD128 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VCVTPS2UDQMasked128Merging) + v.reset(OpAMD64VPROLVDMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VSCALEFPS128 x y) mask) + // match: (VPBLENDVB128 dst (VPROLVQ128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VSCALEFPSMasked128Merging dst x y (VPMOVVec32x4ToM mask)) + // result: (VPROLVQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VSCALEFPS128 { + if v_1.Op != OpAMD64VPROLVQ128 { break } y := v_1.Args[1] @@ -44222,57 +45015,60 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VSCALEFPSMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VPROLVQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPSLLVQ128 x y) mask) + // match: (VPBLENDVB128 dst (VPRORD128 [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSLLVQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) + // result: (VPRORDMasked128Merging dst [a] x (VPMOVVec32x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSLLVQ128 { + if v_1.Op != OpAMD64VPRORD128 { break } - y := v_1.Args[1] + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSLLVQMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v.reset(OpAMD64VPRORDMasked128Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVSXBQ256 x) mask) + // match: (VPBLENDVB128 dst (VPRORQ128 [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSXBQMasked256Merging dst x (VPMOVVec8x16ToM mask)) + // result: (VPRORQMasked128Merging dst [a] x (VPMOVVec64x2ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVSXBQ256 { + if v_1.Op != OpAMD64VPRORQ128 { break } + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVSXBQMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v.reset(OpAMD64VPRORQMasked128Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPADDW128 x y) mask) + // match: (VPBLENDVB128 dst (VPRORVD128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPADDWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) + // result: (VPRORVDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPADDW128 { + if v_1.Op != OpAMD64VPRORVD128 { break } y := v_1.Args[1] @@ -44281,86 +45077,93 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPADDWMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v.reset(OpAMD64VPRORVDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVSXWD128 x) mask) + // match: (VPBLENDVB128 dst (VPRORVQ128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSXWDMasked128Merging dst x (VPMOVVec16x8ToM mask)) + // result: (VPRORVQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVSXWD128 { + if v_1.Op != OpAMD64VPRORVQ128 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVSXWDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v.reset(OpAMD64VPRORVQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VRSQRT14PD128 x) mask) + // match: (VPBLENDVB128 dst (VPSHLDD128 [a] x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VRSQRT14PDMasked128Merging dst x (VPMOVVec64x2ToM mask)) + // result: (VPSHLDDMasked128Merging dst [a] x y (VPMOVVec32x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VRSQRT14PD128 { + if v_1.Op != OpAMD64VPSHLDD128 { break } + a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VRSQRT14PDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v.reset(OpAMD64VPSHLDDMasked128Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPSRAW128const [a] x) mask) + // match: (VPBLENDVB128 dst (VPSHLDQ128 [a] x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRAWMasked128constMerging dst [a] x (VPMOVVec16x8ToM mask)) + // result: (VPSHLDQMasked128Merging dst [a] x y (VPMOVVec64x2ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSRAW128const { + if v_1.Op != OpAMD64VPSHLDQ128 { break } a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRAWMasked128constMerging) + v.reset(OpAMD64VPSHLDQMasked128Merging) v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPMULHW128 x y) mask) + // match: (VPBLENDVB128 dst (VPSHLDW128 [a] x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMULHWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) + // result: (VPSHLDWMasked128Merging dst [a] x y (VPMOVVec16x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMULHW128 { + if v_1.Op != OpAMD64VPSHLDW128 { break } + a := auxIntToUint8(v_1.AuxInt) y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMULHWMasked128Merging) + v.reset(OpAMD64VPSHLDWMasked128Merging) + v.AuxInt = uint8ToAuxInt(a) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) @@ -44388,52 +45191,56 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPSUBSB128 x y) mask) + // match: (VPBLENDVB128 dst (VPSHRDQ128 [a] x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSUBSBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) + // result: (VPSHRDQMasked128Merging dst [a] x y (VPMOVVec64x2ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSUBSB128 { + if v_1.Op != OpAMD64VPSHRDQ128 { break } + a := auxIntToUint8(v_1.AuxInt) y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSUBSBMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v.reset(OpAMD64VPSHRDQMasked128Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPADDSW128 x y) mask) + // match: (VPBLENDVB128 dst (VPSHRDW128 [a] x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPADDSWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) + // result: (VPSHRDWMasked128Merging dst [a] x y (VPMOVVec16x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPADDSW128 { + if v_1.Op != OpAMD64VPSHRDW128 { break } + a := auxIntToUint8(v_1.AuxInt) y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPADDSWMasked128Merging) + v.reset(OpAMD64VPSHRDWMasked128Merging) + v.AuxInt = uint8ToAuxInt(a) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPMINSB128 x y) mask) + // match: (VPBLENDVB128 dst (VPSHUFB128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMINSBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) + // result: (VPSHUFBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMINSB128 { + if v_1.Op != OpAMD64VPSHUFB128 { break } y := v_1.Args[1] @@ -44442,7 +45249,7 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMINSBMasked128Merging) + v.reset(OpAMD64VPSHUFBMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) @@ -44469,31 +45276,33 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPBROADCASTQ512 x) mask) + // match: (VPBLENDVB128 dst (VPSHUFHW128 [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPBROADCASTQMasked512Merging dst x (VPMOVVec64x2ToM mask)) + // result: (VPSHUFHWMasked128Merging dst [a] x (VPMOVVec16x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPBROADCASTQ512 { + if v_1.Op != OpAMD64VPSHUFHW128 { break } + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPBROADCASTQMasked512Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v.reset(OpAMD64VPSHUFHWMasked128Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VREDUCEPS128 [a] x) mask) + // match: (VPBLENDVB128 dst (VPSLLD128const [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VREDUCEPSMasked128Merging dst [a] x (VPMOVVec32x4ToM mask)) + // result: (VPSLLDMasked128constMerging dst [a] x (VPMOVVec32x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VREDUCEPS128 { + if v_1.Op != OpAMD64VPSLLD128const { break } a := auxIntToUint8(v_1.AuxInt) @@ -44502,38 +45311,40 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VREDUCEPSMasked128Merging) + v.reset(OpAMD64VPSLLDMasked128constMerging) v.AuxInt = uint8ToAuxInt(a) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVZXWQ512 x) mask) + // match: (VPBLENDVB128 dst (VPSLLQ128const [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVZXWQMasked512Merging dst x (VPMOVVec16x8ToM mask)) + // result: (VPSLLQMasked128constMerging dst [a] x (VPMOVVec64x2ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVZXWQ512 { + if v_1.Op != OpAMD64VPSLLQ128const { break } + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVZXWQMasked512Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v.reset(OpAMD64VPSLLQMasked128constMerging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPSRAVW128 x y) mask) + // match: (VPBLENDVB128 dst (VPSLLVD128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRAVWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) + // result: (VPSLLVDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSRAVW128 { + if v_1.Op != OpAMD64VPSLLVD128 { break } y := v_1.Args[1] @@ -44542,37 +45353,38 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRAVWMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v.reset(OpAMD64VPSLLVDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VSQRTPD128 x) mask) + // match: (VPBLENDVB128 dst (VPSLLVQ128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VSQRTPDMasked128Merging dst x (VPMOVVec64x2ToM mask)) + // result: (VPSLLVQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VSQRTPD128 { + if v_1.Op != OpAMD64VPSLLVQ128 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VSQRTPDMasked128Merging) + v.reset(OpAMD64VPSLLVQMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPAVGW128 x y) mask) + // match: (VPBLENDVB128 dst (VPSLLVW128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPAVGWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) + // result: (VPSLLVWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPAVGW128 { + if v_1.Op != OpAMD64VPSLLVW128 { break } y := v_1.Args[1] @@ -44581,96 +45393,141 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPAVGWMasked128Merging) + v.reset(OpAMD64VPSLLVWMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VDIVPS128 x y) mask) + // match: (VPBLENDVB128 dst (VPSLLW128const [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VDIVPSMasked128Merging dst x y (VPMOVVec32x4ToM mask)) + // result: (VPSLLWMasked128constMerging dst [a] x (VPMOVVec16x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VDIVPS128 { + if v_1.Op != OpAMD64VPSLLW128const { break } - y := v_1.Args[1] + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VDIVPSMasked128Merging) + v.reset(OpAMD64VPSLLWMasked128constMerging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(dst, x, v0) + return true + } + // match: (VPBLENDVB128 dst (VPSRAD128const [a] x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSRADMasked128constMerging dst [a] x (VPMOVVec32x4ToM mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VPSRAD128const { + break + } + a := auxIntToUint8(v_1.AuxInt) + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPSRADMasked128constMerging) + v.AuxInt = uint8ToAuxInt(a) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VDIVPD128 x y) mask) + // match: (VPBLENDVB128 dst (VPSRAQ128const [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VDIVPDMasked128Merging dst x y (VPMOVVec64x2ToM mask)) + // result: (VPSRAQMasked128constMerging dst [a] x (VPMOVVec64x2ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VDIVPD128 { + if v_1.Op != OpAMD64VPSRAQ128const { break } - y := v_1.Args[1] + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VDIVPDMasked128Merging) + v.reset(OpAMD64VPSRAQMasked128constMerging) + v.AuxInt = uint8ToAuxInt(a) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPOPCNTD128 x) mask) + // match: (VPBLENDVB128 dst (VPSRAVD128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPOPCNTDMasked128Merging dst x (VPMOVVec32x4ToM mask)) + // result: (VPSRAVDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPOPCNTD128 { + if v_1.Op != OpAMD64VPSRAVD128 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPOPCNTDMasked128Merging) + v.reset(OpAMD64VPSRAVDMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPBROADCASTQ256 x) mask) + // match: (VPBLENDVB128 dst (VPSRAVQ128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPBROADCASTQMasked256Merging dst x (VPMOVVec64x2ToM mask)) + // result: (VPSRAVQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPBROADCASTQ256 { + if v_1.Op != OpAMD64VPSRAVQ128 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPBROADCASTQMasked256Merging) + v.reset(OpAMD64VPSRAVQMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VRNDSCALEPD128 [a] x) mask) + // match: (VPBLENDVB128 dst (VPSRAVW128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VRNDSCALEPDMasked128Merging dst [a] x (VPMOVVec64x2ToM mask)) + // result: (VPSRAVWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VRNDSCALEPD128 { + if v_1.Op != OpAMD64VPSRAVW128 { + break + } + y := v_1.Args[1] + x := v_1.Args[0] + mask := v_2 + if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { + break + } + v.reset(OpAMD64VPSRAVWMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg4(dst, x, y, v0) + return true + } + // match: (VPBLENDVB128 dst (VPSRAW128const [a] x) mask) + // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) + // result: (VPSRAWMasked128constMerging dst [a] x (VPMOVVec16x8ToM mask)) + for { + dst := v_0 + if v_1.Op != OpAMD64VPSRAW128const { break } a := auxIntToUint8(v_1.AuxInt) @@ -44679,57 +45536,59 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VRNDSCALEPDMasked128Merging) + v.reset(OpAMD64VPSRAWMasked128constMerging) v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVSXWQ128 x) mask) + // match: (VPBLENDVB128 dst (VPSRLVD128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSXWQMasked128Merging dst x (VPMOVVec16x8ToM mask)) + // result: (VPSRLVDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVSXWQ128 { + if v_1.Op != OpAMD64VPSRLVD128 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVSXWQMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v.reset(OpAMD64VPSRLVDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPOPCNTQ128 x) mask) + // match: (VPBLENDVB128 dst (VPSRLVQ128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPOPCNTQMasked128Merging dst x (VPMOVVec64x2ToM mask)) + // result: (VPSRLVQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPOPCNTQ128 { + if v_1.Op != OpAMD64VPSRLVQ128 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPOPCNTQMasked128Merging) + v.reset(OpAMD64VPSRLVQMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPAVGB128 x y) mask) + // match: (VPBLENDVB128 dst (VPSRLVW128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPAVGBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) + // result: (VPSRLVWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPAVGB128 { + if v_1.Op != OpAMD64VPSRLVW128 { break } y := v_1.Args[1] @@ -44738,37 +45597,38 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPAVGBMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v.reset(OpAMD64VPSRLVWMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVSXBQ128 x) mask) + // match: (VPBLENDVB128 dst (VPSUBB128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSXBQMasked128Merging dst x (VPMOVVec8x16ToM mask)) + // result: (VPSUBBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVSXBQ128 { + if v_1.Op != OpAMD64VPSUBB128 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVSXBQMasked128Merging) + v.reset(OpAMD64VPSUBBMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPMAXSW128 x y) mask) + // match: (VPBLENDVB128 dst (VPSUBD128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMAXSWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) + // result: (VPSUBDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMAXSW128 { + if v_1.Op != OpAMD64VPSUBD128 { break } y := v_1.Args[1] @@ -44777,98 +45637,98 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMAXSWMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v.reset(OpAMD64VPSUBDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVZXBW256 x) mask) + // match: (VPBLENDVB128 dst (VPSUBQ128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVZXBWMasked256Merging dst x (VPMOVVec8x16ToM mask)) + // result: (VPSUBQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVZXBW256 { + if v_1.Op != OpAMD64VPSUBQ128 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVZXBWMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v.reset(OpAMD64VPSUBQMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVZXBD512 x) mask) + // match: (VPBLENDVB128 dst (VPSUBSB128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVZXBDMasked512Merging dst x (VPMOVVec8x16ToM mask)) + // result: (VPSUBSBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVZXBD512 { + if v_1.Op != OpAMD64VPSUBSB128 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVZXBDMasked512Merging) + v.reset(OpAMD64VPSUBSBMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPSHUFHW128 [a] x) mask) + // match: (VPBLENDVB128 dst (VPSUBSW128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSHUFHWMasked128Merging dst [a] x (VPMOVVec16x8ToM mask)) + // result: (VPSUBSWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSHUFHW128 { + if v_1.Op != OpAMD64VPSUBSW128 { break } - a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHUFHWMasked128Merging) - v.AuxInt = uint8ToAuxInt(a) + v.reset(OpAMD64VPSUBSWMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPSLLW128const [a] x) mask) + // match: (VPBLENDVB128 dst (VPSUBUSB128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSLLWMasked128constMerging dst [a] x (VPMOVVec16x8ToM mask)) + // result: (VPSUBUSBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSLLW128const { + if v_1.Op != OpAMD64VPSUBUSB128 { break } - a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSLLWMasked128constMerging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v.reset(OpAMD64VPSUBUSBMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPSLLVD128 x y) mask) + // match: (VPBLENDVB128 dst (VPSUBUSW128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSLLVDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) + // result: (VPSUBUSWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSLLVD128 { + if v_1.Op != OpAMD64VPSUBUSW128 { break } y := v_1.Args[1] @@ -44877,18 +45737,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSLLVDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VPSUBUSWMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPSRLVD128 x y) mask) + // match: (VPBLENDVB128 dst (VPSUBW128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRLVDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) + // result: (VPSUBWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSRLVD128 { + if v_1.Op != OpAMD64VPSUBW128 { break } y := v_1.Args[1] @@ -44897,18 +45757,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRLVDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VPSUBWMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVSXWQ512 x) mask) + // match: (VPBLENDVB128 dst (VRCP14PD128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSXWQMasked512Merging dst x (VPMOVVec16x8ToM mask)) + // result: (VRCP14PDMasked128Merging dst x (VPMOVVec64x2ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVSXWQ512 { + if v_1.Op != OpAMD64VRCP14PD128 { break } x := v_1.Args[0] @@ -44916,38 +45776,39 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVSXWQMasked512Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v.reset(OpAMD64VRCP14PDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPSUBQ128 x y) mask) + // match: (VPBLENDVB128 dst (VREDUCEPD128 [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSUBQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) + // result: (VREDUCEPDMasked128Merging dst [a] x (VPMOVVec64x2ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSUBQ128 { + if v_1.Op != OpAMD64VREDUCEPD128 { break } - y := v_1.Args[1] + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSUBQMasked128Merging) + v.reset(OpAMD64VREDUCEPDMasked128Merging) + v.AuxInt = uint8ToAuxInt(a) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPSLLD128const [a] x) mask) + // match: (VPBLENDVB128 dst (VREDUCEPS128 [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSLLDMasked128constMerging dst [a] x (VPMOVVec32x4ToM mask)) + // result: (VREDUCEPSMasked128Merging dst [a] x (VPMOVVec32x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSLLD128const { + if v_1.Op != OpAMD64VREDUCEPS128 { break } a := auxIntToUint8(v_1.AuxInt) @@ -44956,39 +45817,40 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSLLDMasked128constMerging) + v.reset(OpAMD64VREDUCEPSMasked128Merging) v.AuxInt = uint8ToAuxInt(a) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPSRLVW128 x y) mask) + // match: (VPBLENDVB128 dst (VRNDSCALEPD128 [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRLVWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) + // result: (VRNDSCALEPDMasked128Merging dst [a] x (VPMOVVec64x2ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSRLVW128 { + if v_1.Op != OpAMD64VRNDSCALEPD128 { break } - y := v_1.Args[1] + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRLVWMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v.reset(OpAMD64VRNDSCALEPDMasked128Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPSLLQ128const [a] x) mask) + // match: (VPBLENDVB128 dst (VRNDSCALEPS128 [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSLLQMasked128constMerging dst [a] x (VPMOVVec64x2ToM mask)) + // result: (VRNDSCALEPSMasked128Merging dst [a] x (VPMOVVec32x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSLLQ128const { + if v_1.Op != OpAMD64VRNDSCALEPS128 { break } a := auxIntToUint8(v_1.AuxInt) @@ -44997,77 +45859,78 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSLLQMasked128constMerging) + v.reset(OpAMD64VRNDSCALEPSMasked128Merging) v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPSRAVD128 x y) mask) + // match: (VPBLENDVB128 dst (VRSQRT14PD128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRAVDMasked128Merging dst x y (VPMOVVec32x4ToM mask)) + // result: (VRSQRT14PDMasked128Merging dst x (VPMOVVec64x2ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSRAVD128 { + if v_1.Op != OpAMD64VRSQRT14PD128 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRAVDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VRSQRT14PDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVSXBD128 x) mask) + // match: (VPBLENDVB128 dst (VSCALEFPD128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSXBDMasked128Merging dst x (VPMOVVec8x16ToM mask)) + // result: (VSCALEFPDMasked128Merging dst x y (VPMOVVec64x2ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVSXBD128 { + if v_1.Op != OpAMD64VSCALEFPD128 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVSXBDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v.reset(OpAMD64VSCALEFPDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVZXBQ512 x) mask) + // match: (VPBLENDVB128 dst (VSCALEFPS128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVZXBQMasked512Merging dst x (VPMOVVec8x16ToM mask)) + // result: (VSCALEFPSMasked128Merging dst x y (VPMOVVec32x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVZXBQ512 { + if v_1.Op != OpAMD64VSCALEFPS128 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVZXBQMasked512Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v.reset(OpAMD64VSCALEFPSMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPLZCNTQ128 x) mask) + // match: (VPBLENDVB128 dst (VSQRTPD128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPLZCNTQMasked128Merging dst x (VPMOVVec64x2ToM mask)) + // result: (VSQRTPDMasked128Merging dst x (VPMOVVec64x2ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPLZCNTQ128 { + if v_1.Op != OpAMD64VSQRTPD128 { break } x := v_1.Args[0] @@ -45075,57 +45938,57 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPLZCNTQMasked128Merging) + v.reset(OpAMD64VSQRTPDMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPACKSSDW128 x y) mask) + // match: (VPBLENDVB128 dst (VSQRTPS128 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPACKSSDWMasked128Merging dst x y (VPMOVVec32x4ToM mask)) + // result: (VSQRTPSMasked128Merging dst x (VPMOVVec32x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPACKSSDW128 { + if v_1.Op != OpAMD64VSQRTPS128 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPACKSSDWMasked128Merging) + v.reset(OpAMD64VSQRTPSMasked128Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPMOVZXWD128 x) mask) + // match: (VPBLENDVB128 dst (VSUBPD128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVZXWDMasked128Merging dst x (VPMOVVec16x8ToM mask)) + // result: (VSUBPDMasked128Merging dst x y (VPMOVVec64x2ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVZXWD128 { + if v_1.Op != OpAMD64VSUBPD128 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVZXWDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v.reset(OpAMD64VSUBPDMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPSRAVQ128 x y) mask) + // match: (VPBLENDVB128 dst (VSUBPS128 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRAVQMasked128Merging dst x y (VPMOVVec64x2ToM mask)) + // result: (VSUBPSMasked128Merging dst x y (VPMOVVec32x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSRAVQ128 { + if v_1.Op != OpAMD64VSUBPS128 { break } y := v_1.Args[1] @@ -45134,37 +45997,45 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRAVQMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v.reset(OpAMD64VSUBPSMasked128Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPBROADCASTD128 x) mask) + return false +} +func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (VPBLENDVB256 dst (VADDPD256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPBROADCASTDMasked128Merging dst x (VPMOVVec32x4ToM mask)) + // result: (VADDPDMasked256Merging dst x y (VPMOVVec64x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPBROADCASTD128 { + if v_1.Op != OpAMD64VADDPD256 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPBROADCASTDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VADDPDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VMAXPS128 x y) mask) + // match: (VPBLENDVB256 dst (VADDPS256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VMAXPSMasked128Merging dst x y (VPMOVVec32x4ToM mask)) + // result: (VADDPSMasked256Merging dst x y (VPMOVVec32x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VMAXPS128 { + if v_1.Op != OpAMD64VADDPS256 { break } y := v_1.Args[1] @@ -45173,98 +46044,96 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VMAXPSMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VADDPSMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPSHRDQ128 [a] x y) mask) + // match: (VPBLENDVB256 dst (VCVTPS2UDQ256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSHRDQMasked128Merging dst [a] x y (VPMOVVec64x2ToM mask)) + // result: (VCVTPS2UDQMasked256Merging dst x (VPMOVVec32x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSHRDQ128 { + if v_1.Op != OpAMD64VCVTPS2UDQ256 { break } - a := auxIntToUint8(v_1.AuxInt) - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHRDQMasked128Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v.reset(OpAMD64VCVTPS2UDQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPMAXUW128 x y) mask) + // match: (VPBLENDVB256 dst (VCVTTPS2DQ256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMAXUWMasked128Merging dst x y (VPMOVVec16x8ToM mask)) + // result: (VCVTTPS2DQMasked256Merging dst x (VPMOVVec32x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMAXUW128 { + if v_1.Op != OpAMD64VCVTTPS2DQ256 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMAXUWMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v.reset(OpAMD64VCVTTPS2DQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB128 dst (VPABSB128 x) mask) + // match: (VPBLENDVB256 dst (VDIVPD256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPABSBMasked128Merging dst x (VPMOVVec8x16ToM mask)) + // result: (VDIVPDMasked256Merging dst x y (VPMOVVec64x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPABSB128 { + if v_1.Op != OpAMD64VDIVPD256 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPABSBMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v.reset(OpAMD64VDIVPDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPABSQ128 x) mask) + // match: (VPBLENDVB256 dst (VDIVPS256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPABSQMasked128Merging dst x (VPMOVVec64x2ToM mask)) + // result: (VDIVPSMasked256Merging dst x y (VPMOVVec32x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPABSQ128 { + if v_1.Op != OpAMD64VDIVPS256 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPABSQMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v.reset(OpAMD64VDIVPSMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VSCALEFPD128 x y) mask) + // match: (VPBLENDVB256 dst (VGF2P8MULB256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VSCALEFPDMasked128Merging dst x y (VPMOVVec64x2ToM mask)) + // result: (VGF2P8MULBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VSCALEFPD128 { + if v_1.Op != OpAMD64VGF2P8MULB256 { break } y := v_1.Args[1] @@ -45273,56 +46142,58 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VSCALEFPDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v.reset(OpAMD64VGF2P8MULBMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VSQRTPS128 x) mask) + // match: (VPBLENDVB256 dst (VMAXPD256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VSQRTPSMasked128Merging dst x (VPMOVVec32x4ToM mask)) + // result: (VMAXPDMasked256Merging dst x y (VPMOVVec64x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VSQRTPS128 { + if v_1.Op != OpAMD64VMAXPD256 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VSQRTPSMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v.reset(OpAMD64VMAXPDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPBROADCASTW512 x) mask) + // match: (VPBLENDVB256 dst (VMAXPS256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPBROADCASTWMasked512Merging dst x (VPMOVVec16x8ToM mask)) + // result: (VMAXPSMasked256Merging dst x y (VPMOVVec32x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPBROADCASTW512 { + if v_1.Op != OpAMD64VMAXPS256 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPBROADCASTWMasked512Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v.reset(OpAMD64VMAXPSMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB128 dst (VPMAXUB128 x y) mask) + // match: (VPBLENDVB256 dst (VMINPD256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMAXUBMasked128Merging dst x y (VPMOVVec8x16ToM mask)) + // result: (VMINPDMasked256Merging dst x y (VPMOVVec64x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMAXUB128 { + if v_1.Op != OpAMD64VMINPD256 { break } y := v_1.Args[1] @@ -45331,44 +46202,38 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMAXUBMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask) + v.reset(OpAMD64VMINPDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - return false -} -func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (VPBLENDVB256 dst (VPMOVSXBW512 x) mask) + // match: (VPBLENDVB256 dst (VMINPS256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSXBWMasked512Merging dst x (VPMOVVec8x32ToM mask)) + // result: (VMINPSMasked256Merging dst x y (VPMOVVec32x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVSXBW512 { + if v_1.Op != OpAMD64VMINPS256 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVSXBWMasked512Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v.reset(OpAMD64VMINPSMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPADDUSB256 x y) mask) + // match: (VPBLENDVB256 dst (VMULPD256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPADDUSBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) + // result: (VMULPDMasked256Merging dst x y (VPMOVVec64x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPADDUSB256 { + if v_1.Op != OpAMD64VMULPD256 { break } y := v_1.Args[1] @@ -45377,8 +46242,8 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPADDUSBMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v.reset(OpAMD64VMULPDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true @@ -45403,12 +46268,12 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPOPCNTB256 x) mask) + // match: (VPBLENDVB256 dst (VPABSB256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPOPCNTBMasked256Merging dst x (VPMOVVec8x32ToM mask)) + // result: (VPABSBMasked256Merging dst x (VPMOVVec8x32ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPOPCNTB256 { + if v_1.Op != OpAMD64VPABSB256 { break } x := v_1.Args[0] @@ -45416,79 +46281,75 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPOPCNTBMasked256Merging) + v.reset(OpAMD64VPABSBMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VSUBPS256 x y) mask) + // match: (VPBLENDVB256 dst (VPABSD256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VSUBPSMasked256Merging dst x y (VPMOVVec32x8ToM mask)) + // result: (VPABSDMasked256Merging dst x (VPMOVVec32x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VSUBPS256 { + if v_1.Op != OpAMD64VPABSD256 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VSUBPSMasked256Merging) + v.reset(OpAMD64VPABSDMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPMAXUQ256 x y) mask) + // match: (VPBLENDVB256 dst (VPABSQ256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMAXUQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) + // result: (VPABSQMasked256Merging dst x (VPMOVVec64x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMAXUQ256 { + if v_1.Op != OpAMD64VPABSQ256 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMAXUQMasked256Merging) + v.reset(OpAMD64VPABSQMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPROLD256 [a] x) mask) + // match: (VPBLENDVB256 dst (VPABSW256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPROLDMasked256Merging dst [a] x (VPMOVVec32x8ToM mask)) + // result: (VPABSWMasked256Merging dst x (VPMOVVec16x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPROLD256 { + if v_1.Op != OpAMD64VPABSW256 { break } - a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPROLDMasked256Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v.reset(OpAMD64VPABSWMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPSRAVD256 x y) mask) + // match: (VPBLENDVB256 dst (VPACKSSDW256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRAVDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) + // result: (VPACKSSDWMasked256Merging dst x y (VPMOVVec32x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSRAVD256 { + if v_1.Op != OpAMD64VPACKSSDW256 { break } y := v_1.Args[1] @@ -45497,18 +46358,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRAVDMasked256Merging) + v.reset(OpAMD64VPACKSSDWMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VADDPS256 x y) mask) + // match: (VPBLENDVB256 dst (VPACKUSDW256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VADDPSMasked256Merging dst x y (VPMOVVec32x8ToM mask)) + // result: (VPACKUSDWMasked256Merging dst x y (VPMOVVec32x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VADDPS256 { + if v_1.Op != OpAMD64VPACKUSDW256 { break } y := v_1.Args[1] @@ -45517,96 +46378,98 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VADDPSMasked256Merging) + v.reset(OpAMD64VPACKUSDWMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPMOVSXDQ512 x) mask) + // match: (VPBLENDVB256 dst (VPADDB256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSXDQMasked512Merging dst x (VPMOVVec32x8ToM mask)) + // result: (VPADDBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVSXDQ512 { + if v_1.Op != OpAMD64VPADDB256 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVSXDQMasked512Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v.reset(OpAMD64VPADDBMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPMOVUSWB128 x) mask) + // match: (VPBLENDVB256 dst (VPADDD256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVUSWBMasked128Merging dst x (VPMOVVec16x16ToM mask)) + // result: (VPADDDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVUSWB128 { + if v_1.Op != OpAMD64VPADDD256 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVUSWBMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v.reset(OpAMD64VPADDDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPSRAQ256const [a] x) mask) + // match: (VPBLENDVB256 dst (VPADDQ256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRAQMasked256constMerging dst [a] x (VPMOVVec64x4ToM mask)) + // result: (VPADDQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSRAQ256const { + if v_1.Op != OpAMD64VPADDQ256 { break } - a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRAQMasked256constMerging) - v.AuxInt = uint8ToAuxInt(a) + v.reset(OpAMD64VPADDQMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VCVTPS2UDQ256 x) mask) + // match: (VPBLENDVB256 dst (VPADDSB256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VCVTPS2UDQMasked256Merging dst x (VPMOVVec32x8ToM mask)) + // result: (VPADDSBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VCVTPS2UDQ256 { + if v_1.Op != OpAMD64VPADDSB256 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VCVTPS2UDQMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v.reset(OpAMD64VPADDSBMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPMINSW256 x y) mask) + // match: (VPBLENDVB256 dst (VPADDSW256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMINSWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) + // result: (VPADDSWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMINSW256 { + if v_1.Op != OpAMD64VPADDSW256 { break } y := v_1.Args[1] @@ -45615,40 +46478,38 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMINSWMasked256Merging) + v.reset(OpAMD64VPADDSWMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPSHLDD256 [a] x y) mask) + // match: (VPBLENDVB256 dst (VPADDUSB256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSHLDDMasked256Merging dst [a] x y (VPMOVVec32x8ToM mask)) + // result: (VPADDUSBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSHLDD256 { + if v_1.Op != OpAMD64VPADDUSB256 { break } - a := auxIntToUint8(v_1.AuxInt) y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHLDDMasked256Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v.reset(OpAMD64VPADDUSBMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPSLLVW256 x y) mask) + // match: (VPBLENDVB256 dst (VPADDUSW256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSLLVWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) + // result: (VPADDUSWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSLLVW256 { + if v_1.Op != OpAMD64VPADDUSW256 { break } y := v_1.Args[1] @@ -45657,18 +46518,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSLLVWMasked256Merging) + v.reset(OpAMD64VPADDUSWMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPSRLVQ256 x y) mask) + // match: (VPBLENDVB256 dst (VPADDW256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRLVQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) + // result: (VPADDWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSRLVQ256 { + if v_1.Op != OpAMD64VPADDW256 { break } y := v_1.Args[1] @@ -45677,18 +46538,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRLVQMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VPADDWMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPSUBUSB256 x y) mask) + // match: (VPBLENDVB256 dst (VPAVGB256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSUBUSBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) + // result: (VPAVGBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSUBUSB256 { + if v_1.Op != OpAMD64VPAVGB256 { break } y := v_1.Args[1] @@ -45697,18 +46558,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSUBUSBMasked256Merging) + v.reset(OpAMD64VPAVGBMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPMAXSW256 x y) mask) + // match: (VPBLENDVB256 dst (VPAVGW256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMAXSWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) + // result: (VPAVGWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMAXSW256 { + if v_1.Op != OpAMD64VPAVGW256 { break } y := v_1.Args[1] @@ -45717,58 +46578,56 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMAXSWMasked256Merging) + v.reset(OpAMD64VPAVGWMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VMINPS256 x y) mask) + // match: (VPBLENDVB256 dst (VPLZCNTD256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VMINPSMasked256Merging dst x y (VPMOVVec32x8ToM mask)) + // result: (VPLZCNTDMasked256Merging dst x (VPMOVVec32x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VMINPS256 { + if v_1.Op != OpAMD64VPLZCNTD256 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VMINPSMasked256Merging) + v.reset(OpAMD64VPLZCNTDMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPMINSD256 x y) mask) + // match: (VPBLENDVB256 dst (VPLZCNTQ256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMINSDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) + // result: (VPLZCNTQMasked256Merging dst x (VPMOVVec64x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMINSD256 { + if v_1.Op != OpAMD64VPLZCNTQ256 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMINSDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v.reset(OpAMD64VPLZCNTQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPADDSW256 x y) mask) + // match: (VPBLENDVB256 dst (VPMADDUBSW256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPADDSWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) + // result: (VPMADDUBSWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPADDSW256 { + if v_1.Op != OpAMD64VPMADDUBSW256 { break } y := v_1.Args[1] @@ -45777,39 +46636,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPADDSWMasked256Merging) + v.reset(OpAMD64VPMADDUBSWMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VRNDSCALEPS256 [a] x) mask) - // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VRNDSCALEPSMasked256Merging dst [a] x (VPMOVVec32x8ToM mask)) - for { - dst := v_0 - if v_1.Op != OpAMD64VRNDSCALEPS256 { - break - } - a := auxIntToUint8(v_1.AuxInt) - x := v_1.Args[0] - mask := v_2 - if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { - break - } - v.reset(OpAMD64VRNDSCALEPSMasked256Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(dst, x, v0) - return true - } - // match: (VPBLENDVB256 dst (VPROLVQ256 x y) mask) + // match: (VPBLENDVB256 dst (VPMADDWD256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPROLVQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) + // result: (VPMADDWDMasked256Merging dst x y (VPMOVVec16x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPROLVQ256 { + if v_1.Op != OpAMD64VPMADDWD256 { break } y := v_1.Args[1] @@ -45818,18 +46656,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPROLVQMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VPMADDWDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPMULHW256 x y) mask) + // match: (VPBLENDVB256 dst (VPMAXSB256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMULHWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) + // result: (VPMAXSBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMULHW256 { + if v_1.Op != OpAMD64VPMAXSB256 { break } y := v_1.Args[1] @@ -45838,18 +46676,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMULHWMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v.reset(OpAMD64VPMAXSBMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VDIVPD256 x y) mask) + // match: (VPBLENDVB256 dst (VPMAXSD256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VDIVPDMasked256Merging dst x y (VPMOVVec64x4ToM mask)) + // result: (VPMAXSDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VDIVPD256 { + if v_1.Op != OpAMD64VPMAXSD256 { break } y := v_1.Args[1] @@ -45858,37 +46696,38 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VDIVPDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VPMAXSDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPLZCNTQ256 x) mask) + // match: (VPBLENDVB256 dst (VPMAXSQ256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPLZCNTQMasked256Merging dst x (VPMOVVec64x4ToM mask)) + // result: (VPMAXSQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPLZCNTQ256 { + if v_1.Op != OpAMD64VPMAXSQ256 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPLZCNTQMasked256Merging) + v.reset(OpAMD64VPMAXSQMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPSRLVD256 x y) mask) + // match: (VPBLENDVB256 dst (VPMAXSW256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRLVDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) + // result: (VPMAXSWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSRLVD256 { + if v_1.Op != OpAMD64VPMAXSW256 { break } y := v_1.Args[1] @@ -45897,18 +46736,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRLVDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v.reset(OpAMD64VPMAXSWMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPADDD256 x y) mask) + // match: (VPBLENDVB256 dst (VPMAXUB256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPADDDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) + // result: (VPMAXUBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPADDD256 { + if v_1.Op != OpAMD64VPMAXUB256 { break } y := v_1.Args[1] @@ -45917,56 +46756,58 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPADDDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v.reset(OpAMD64VPMAXUBMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPMOVSDW128 x) mask) + // match: (VPBLENDVB256 dst (VPMAXUD256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSDWMasked128Merging dst x (VPMOVVec32x8ToM mask)) + // result: (VPMAXUDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVSDW128 { + if v_1.Op != OpAMD64VPMAXUD256 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVSDWMasked128Merging) + v.reset(OpAMD64VPMAXUDMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPOPCNTD256 x) mask) + // match: (VPBLENDVB256 dst (VPMAXUQ256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPOPCNTDMasked256Merging dst x (VPMOVVec32x8ToM mask)) + // result: (VPMAXUQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPOPCNTD256 { + if v_1.Op != OpAMD64VPMAXUQ256 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPOPCNTDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v.reset(OpAMD64VPMAXUQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPADDUSW256 x y) mask) + // match: (VPBLENDVB256 dst (VPMAXUW256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPADDUSWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) + // result: (VPMAXUWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPADDUSW256 { + if v_1.Op != OpAMD64VPMAXUW256 { break } y := v_1.Args[1] @@ -45975,58 +46816,58 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPADDUSWMasked256Merging) + v.reset(OpAMD64VPMAXUWMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VSQRTPD256 x) mask) + // match: (VPBLENDVB256 dst (VPMINSB256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VSQRTPDMasked256Merging dst x (VPMOVVec64x4ToM mask)) + // result: (VPMINSBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VSQRTPD256 { + if v_1.Op != OpAMD64VPMINSB256 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VSQRTPDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VPMINSBMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VREDUCEPS256 [a] x) mask) + // match: (VPBLENDVB256 dst (VPMINSD256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VREDUCEPSMasked256Merging dst [a] x (VPMOVVec32x8ToM mask)) + // result: (VPMINSDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VREDUCEPS256 { + if v_1.Op != OpAMD64VPMINSD256 { break } - a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VREDUCEPSMasked256Merging) - v.AuxInt = uint8ToAuxInt(a) + v.reset(OpAMD64VPMINSDMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPSUBQ256 x y) mask) + // match: (VPBLENDVB256 dst (VPMINSQ256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSUBQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) + // result: (VPMINSQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSUBQ256 { + if v_1.Op != OpAMD64VPMINSQ256 { break } y := v_1.Args[1] @@ -46035,37 +46876,38 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSUBQMasked256Merging) + v.reset(OpAMD64VPMINSQMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPMOVSXWD512 x) mask) + // match: (VPBLENDVB256 dst (VPMINSW256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSXWDMasked512Merging dst x (VPMOVVec16x16ToM mask)) + // result: (VPMINSWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVSXWD512 { + if v_1.Op != OpAMD64VPMINSW256 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVSXWDMasked512Merging) + v.reset(OpAMD64VPMINSWMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VGF2P8MULB256 x y) mask) + // match: (VPBLENDVB256 dst (VPMINUB256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VGF2P8MULBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) + // result: (VPMINUBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VGF2P8MULB256 { + if v_1.Op != OpAMD64VPMINUB256 { break } y := v_1.Args[1] @@ -46074,18 +46916,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VGF2P8MULBMasked256Merging) + v.reset(OpAMD64VPMINUBMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPSLLVD256 x y) mask) + // match: (VPBLENDVB256 dst (VPMINUD256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSLLVDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) + // result: (VPMINUDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSLLVD256 { + if v_1.Op != OpAMD64VPMINUD256 { break } y := v_1.Args[1] @@ -46094,18 +46936,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSLLVDMasked256Merging) + v.reset(OpAMD64VPMINUDMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPSRLVW256 x y) mask) + // match: (VPBLENDVB256 dst (VPMINUQ256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRLVWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) + // result: (VPMINUQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSRLVW256 { + if v_1.Op != OpAMD64VPMINUQ256 { break } y := v_1.Args[1] @@ -46114,18 +46956,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRLVWMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v.reset(OpAMD64VPMINUQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPADDW256 x y) mask) + // match: (VPBLENDVB256 dst (VPMINUW256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPADDWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) + // result: (VPMINUWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPADDW256 { + if v_1.Op != OpAMD64VPMINUW256 { break } y := v_1.Args[1] @@ -46134,102 +46976,94 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPADDWMasked256Merging) + v.reset(OpAMD64VPMINUWMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VREDUCEPD256 [a] x) mask) + // match: (VPBLENDVB256 dst (VPMOVDB128_256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VREDUCEPDMasked256Merging dst [a] x (VPMOVVec64x4ToM mask)) + // result: (VPMOVDBMasked128_256Merging dst x (VPMOVVec32x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VREDUCEPD256 { + if v_1.Op != OpAMD64VPMOVDB128_256 { break } - a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VREDUCEPDMasked256Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VPMOVDBMasked128_256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VRNDSCALEPD256 [a] x) mask) + // match: (VPBLENDVB256 dst (VPMOVDW128_256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VRNDSCALEPDMasked256Merging dst [a] x (VPMOVVec64x4ToM mask)) + // result: (VPMOVDWMasked128_256Merging dst x (VPMOVVec32x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VRNDSCALEPD256 { + if v_1.Op != OpAMD64VPMOVDW128_256 { break } - a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VRNDSCALEPDMasked256Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VPMOVDWMasked128_256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPRORVD256 x y) mask) + // match: (VPBLENDVB256 dst (VPMOVQB128_256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPRORVDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) + // result: (VPMOVQBMasked128_256Merging dst x (VPMOVVec64x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPRORVD256 { + if v_1.Op != OpAMD64VPMOVQB128_256 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPRORVDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v.reset(OpAMD64VPMOVQBMasked128_256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPSHLDW256 [a] x y) mask) + // match: (VPBLENDVB256 dst (VPMOVQD128_256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSHLDWMasked256Merging dst [a] x y (VPMOVVec16x16ToM mask)) + // result: (VPMOVQDMasked128_256Merging dst x (VPMOVVec64x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSHLDW256 { + if v_1.Op != OpAMD64VPMOVQD128_256 { break } - a := auxIntToUint8(v_1.AuxInt) - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHLDWMasked256Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v.reset(OpAMD64VPMOVQDMasked128_256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VCVTTPS2DQ256 x) mask) + // match: (VPBLENDVB256 dst (VPMOVQW128_256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VCVTTPS2DQMasked256Merging dst x (VPMOVVec32x8ToM mask)) + // result: (VPMOVQWMasked128_256Merging dst x (VPMOVVec64x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VCVTTPS2DQ256 { + if v_1.Op != OpAMD64VPMOVQW128_256 { break } x := v_1.Args[0] @@ -46237,58 +47071,56 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VCVTTPS2DQMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v.reset(OpAMD64VPMOVQWMasked128_256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VSUBPD256 x y) mask) + // match: (VPBLENDVB256 dst (VPMOVSDB128_256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VSUBPDMasked256Merging dst x y (VPMOVVec64x4ToM mask)) + // result: (VPMOVSDBMasked128_256Merging dst x (VPMOVVec32x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VSUBPD256 { + if v_1.Op != OpAMD64VPMOVSDB128_256 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VSUBPDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VPMOVSDBMasked128_256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPSUBD256 x y) mask) + // match: (VPBLENDVB256 dst (VPMOVSDW128_256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSUBDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) + // result: (VPMOVSDWMasked128_256Merging dst x (VPMOVVec32x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSUBD256 { + if v_1.Op != OpAMD64VPMOVSDW128_256 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSUBDMasked256Merging) + v.reset(OpAMD64VPMOVSDWMasked128_256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VSQRTPS256 x) mask) + // match: (VPBLENDVB256 dst (VPMOVSQB128_256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VSQRTPSMasked256Merging dst x (VPMOVVec32x8ToM mask)) + // result: (VPMOVSQBMasked128_256Merging dst x (VPMOVVec64x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VSQRTPS256 { + if v_1.Op != OpAMD64VPMOVSQB128_256 { break } x := v_1.Args[0] @@ -46296,78 +47128,75 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VSQRTPSMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v.reset(OpAMD64VPMOVSQBMasked128_256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPACKUSDW256 x y) mask) + // match: (VPBLENDVB256 dst (VPMOVSQD128_256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPACKUSDWMasked256Merging dst x y (VPMOVVec32x8ToM mask)) + // result: (VPMOVSQDMasked128_256Merging dst x (VPMOVVec64x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPACKUSDW256 { + if v_1.Op != OpAMD64VPMOVSQD128_256 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPACKUSDWMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v.reset(OpAMD64VPMOVSQDMasked128_256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPMULLD256 x y) mask) + // match: (VPBLENDVB256 dst (VPMOVSQW128_256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMULLDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) + // result: (VPMOVSQWMasked128_256Merging dst x (VPMOVVec64x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMULLD256 { + if v_1.Op != OpAMD64VPMOVSQW128_256 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMULLDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v.reset(OpAMD64VPMOVSQWMasked128_256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPADDB256 x y) mask) + // match: (VPBLENDVB256 dst (VPMOVSWB128_256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPADDBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) + // result: (VPMOVSWBMasked128_256Merging dst x (VPMOVVec16x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPADDB256 { + if v_1.Op != OpAMD64VPMOVSWB128_256 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPADDBMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v.reset(OpAMD64VPMOVSWBMasked128_256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPMOVWB128 x) mask) + // match: (VPBLENDVB256 dst (VPMOVSXBW512 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVWBMasked128Merging dst x (VPMOVVec16x16ToM mask)) + // result: (VPMOVSXBWMasked512Merging dst x (VPMOVVec8x32ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVWB128 { + if v_1.Op != OpAMD64VPMOVSXBW512 { break } x := v_1.Args[0] @@ -46375,38 +47204,37 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVWBMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v.reset(OpAMD64VPMOVSXBWMasked512Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPMADDWD256 x y) mask) + // match: (VPBLENDVB256 dst (VPMOVSXDQ512 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMADDWDMasked256Merging dst x y (VPMOVVec16x16ToM mask)) + // result: (VPMOVSXDQMasked512Merging dst x (VPMOVVec32x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMADDWD256 { + if v_1.Op != OpAMD64VPMOVSXDQ512 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMADDWDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v.reset(OpAMD64VPMOVSXDQMasked512Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPMOVQD128 x) mask) + // match: (VPBLENDVB256 dst (VPMOVSXWD512 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVQDMasked128Merging dst x (VPMOVVec64x4ToM mask)) + // result: (VPMOVSXWDMasked512Merging dst x (VPMOVVec16x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVQD128 { + if v_1.Op != OpAMD64VPMOVSXWD512 { break } x := v_1.Args[0] @@ -46414,78 +47242,75 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVQDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VPMOVSXWDMasked512Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPMULHUW256 x y) mask) + // match: (VPBLENDVB256 dst (VPMOVUSDB128_256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMULHUWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) + // result: (VPMOVUSDBMasked128_256Merging dst x (VPMOVVec32x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMULHUW256 { + if v_1.Op != OpAMD64VPMOVUSDB128_256 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMULHUWMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v.reset(OpAMD64VPMOVUSDBMasked128_256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPMULLQ256 x y) mask) + // match: (VPBLENDVB256 dst (VPMOVUSDW128_256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMULLQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) + // result: (VPMOVUSDWMasked128_256Merging dst x (VPMOVVec32x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMULLQ256 { + if v_1.Op != OpAMD64VPMOVUSDW128_256 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMULLQMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VPMOVUSDWMasked128_256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPROLVD256 x y) mask) + // match: (VPBLENDVB256 dst (VPMOVUSQB128_256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPROLVDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) + // result: (VPMOVUSQBMasked128_256Merging dst x (VPMOVVec64x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPROLVD256 { + if v_1.Op != OpAMD64VPMOVUSQB128_256 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPROLVDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v.reset(OpAMD64VPMOVUSQBMasked128_256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPMOVUSDW128 x) mask) + // match: (VPBLENDVB256 dst (VPMOVUSQD128_256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVUSDWMasked128Merging dst x (VPMOVVec32x8ToM mask)) + // result: (VPMOVUSQDMasked128_256Merging dst x (VPMOVVec64x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVUSDW128 { + if v_1.Op != OpAMD64VPMOVUSQD128_256 { break } x := v_1.Args[0] @@ -46493,159 +47318,152 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVUSDWMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v.reset(OpAMD64VPMOVUSQDMasked128_256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPMULLW256 x y) mask) + // match: (VPBLENDVB256 dst (VPMOVUSQW128_256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMULLWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) + // result: (VPMOVUSQWMasked128_256Merging dst x (VPMOVVec64x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMULLW256 { + if v_1.Op != OpAMD64VPMOVUSQW128_256 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMULLWMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v.reset(OpAMD64VPMOVUSQWMasked128_256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPRORD256 [a] x) mask) + // match: (VPBLENDVB256 dst (VPMOVUSWB128_256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPRORDMasked256Merging dst [a] x (VPMOVVec32x8ToM mask)) + // result: (VPMOVUSWBMasked128_256Merging dst x (VPMOVVec16x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPRORD256 { + if v_1.Op != OpAMD64VPMOVUSWB128_256 { break } - a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPRORDMasked256Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v.reset(OpAMD64VPMOVUSWBMasked128_256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPSRAVW256 x y) mask) + // match: (VPBLENDVB256 dst (VPMOVWB128_256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRAVWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) + // result: (VPMOVWBMasked128_256Merging dst x (VPMOVVec16x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSRAVW256 { + if v_1.Op != OpAMD64VPMOVWB128_256 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRAVWMasked256Merging) + v.reset(OpAMD64VPMOVWBMasked128_256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPMINUD256 x y) mask) + // match: (VPBLENDVB256 dst (VPMOVZXBW512 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMINUDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) + // result: (VPMOVZXBWMasked512Merging dst x (VPMOVVec8x32ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMINUD256 { + if v_1.Op != OpAMD64VPMOVZXBW512 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMINUDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v.reset(OpAMD64VPMOVZXBWMasked512Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPSHUFD256 [a] x) mask) + // match: (VPBLENDVB256 dst (VPMOVZXDQ512 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSHUFDMasked256Merging dst [a] x (VPMOVVec32x8ToM mask)) + // result: (VPMOVZXDQMasked512Merging dst x (VPMOVVec32x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSHUFD256 { + if v_1.Op != OpAMD64VPMOVZXDQ512 { break } - a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHUFDMasked256Merging) - v.AuxInt = uint8ToAuxInt(a) + v.reset(OpAMD64VPMOVZXDQMasked512Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPSLLVQ256 x y) mask) + // match: (VPBLENDVB256 dst (VPMOVZXWD512 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSLLVQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) + // result: (VPMOVZXWDMasked512Merging dst x (VPMOVVec16x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSLLVQ256 { + if v_1.Op != OpAMD64VPMOVZXWD512 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSLLVQMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VPMOVZXWDMasked512Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPMOVUSQD128 x) mask) + // match: (VPBLENDVB256 dst (VPMULHUW256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVUSQDMasked128Merging dst x (VPMOVVec64x4ToM mask)) + // result: (VPMULHUWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVUSQD128 { + if v_1.Op != OpAMD64VPMULHUW256 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVUSQDMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VPMULHUWMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPSUBUSW256 x y) mask) + // match: (VPBLENDVB256 dst (VPMULHW256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSUBUSWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) + // result: (VPMULHWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSUBUSW256 { + if v_1.Op != OpAMD64VPMULHW256 { break } y := v_1.Args[1] @@ -46654,37 +47472,38 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSUBUSWMasked256Merging) + v.reset(OpAMD64VPMULHWMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VRSQRT14PD256 x) mask) + // match: (VPBLENDVB256 dst (VPMULLD256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VRSQRT14PDMasked256Merging dst x (VPMOVVec64x4ToM mask)) + // result: (VPMULLDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VRSQRT14PD256 { + if v_1.Op != OpAMD64VPMULLD256 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VRSQRT14PDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VPMULLDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPADDSB256 x y) mask) + // match: (VPBLENDVB256 dst (VPMULLQ256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPADDSBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) + // result: (VPMULLQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPADDSB256 { + if v_1.Op != OpAMD64VPMULLQ256 { break } y := v_1.Args[1] @@ -46693,98 +47512,95 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPADDSBMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v.reset(OpAMD64VPMULLQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPMOVZXWD512 x) mask) + // match: (VPBLENDVB256 dst (VPMULLW256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVZXWDMasked512Merging dst x (VPMOVVec16x16ToM mask)) + // result: (VPMULLWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVZXWD512 { + if v_1.Op != OpAMD64VPMULLW256 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVZXWDMasked512Merging) + v.reset(OpAMD64VPMULLWMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPROLQ256 [a] x) mask) + // match: (VPBLENDVB256 dst (VPOPCNTB256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPROLQMasked256Merging dst [a] x (VPMOVVec64x4ToM mask)) + // result: (VPOPCNTBMasked256Merging dst x (VPMOVVec8x32ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPROLQ256 { + if v_1.Op != OpAMD64VPOPCNTB256 { break } - a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPROLQMasked256Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VPOPCNTBMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPAVGB256 x y) mask) + // match: (VPBLENDVB256 dst (VPOPCNTD256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPAVGBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) + // result: (VPOPCNTDMasked256Merging dst x (VPMOVVec32x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPAVGB256 { + if v_1.Op != OpAMD64VPOPCNTD256 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPAVGBMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v.reset(OpAMD64VPOPCNTDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPRORVQ256 x y) mask) + // match: (VPBLENDVB256 dst (VPOPCNTQ256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPRORVQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) + // result: (VPOPCNTQMasked256Merging dst x (VPMOVVec64x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPRORVQ256 { + if v_1.Op != OpAMD64VPOPCNTQ256 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPRORVQMasked256Merging) + v.reset(OpAMD64VPOPCNTQMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPMOVZXDQ512 x) mask) + // match: (VPBLENDVB256 dst (VPOPCNTW256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVZXDQMasked512Merging dst x (VPMOVVec32x8ToM mask)) + // result: (VPOPCNTWMasked256Merging dst x (VPMOVVec16x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVZXDQ512 { + if v_1.Op != OpAMD64VPOPCNTW256 { break } x := v_1.Args[0] @@ -46792,38 +47608,39 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVZXDQMasked512Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v.reset(OpAMD64VPOPCNTWMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPMINUB256 x y) mask) + // match: (VPBLENDVB256 dst (VPROLD256 [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMINUBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) + // result: (VPROLDMasked256Merging dst [a] x (VPMOVVec32x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMINUB256 { + if v_1.Op != OpAMD64VPROLD256 { break } - y := v_1.Args[1] + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMINUBMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v.reset(OpAMD64VPROLDMasked256Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPSLLW256const [a] x) mask) + // match: (VPBLENDVB256 dst (VPROLQ256 [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSLLWMasked256constMerging dst [a] x (VPMOVVec16x16ToM mask)) + // result: (VPROLQMasked256Merging dst [a] x (VPMOVVec64x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSLLW256const { + if v_1.Op != OpAMD64VPROLQ256 { break } a := auxIntToUint8(v_1.AuxInt) @@ -46832,19 +47649,19 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSLLWMasked256constMerging) + v.reset(OpAMD64VPROLQMasked256Merging) v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VSCALEFPS256 x y) mask) + // match: (VPBLENDVB256 dst (VPROLVD256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VSCALEFPSMasked256Merging dst x y (VPMOVVec32x8ToM mask)) + // result: (VPROLVDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VSCALEFPS256 { + if v_1.Op != OpAMD64VPROLVD256 { break } y := v_1.Args[1] @@ -46853,99 +47670,100 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VSCALEFPSMasked256Merging) + v.reset(OpAMD64VPROLVDMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPSLLQ256const [a] x) mask) + // match: (VPBLENDVB256 dst (VPROLVQ256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSLLQMasked256constMerging dst [a] x (VPMOVVec64x4ToM mask)) + // result: (VPROLVQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSLLQ256const { + if v_1.Op != OpAMD64VPROLVQ256 { break } - a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSLLQMasked256constMerging) - v.AuxInt = uint8ToAuxInt(a) + v.reset(OpAMD64VPROLVQMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPMINSB256 x y) mask) + // match: (VPBLENDVB256 dst (VPRORD256 [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMINSBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) + // result: (VPRORDMasked256Merging dst [a] x (VPMOVVec32x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMINSB256 { + if v_1.Op != OpAMD64VPRORD256 { break } - y := v_1.Args[1] + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMINSBMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v.reset(OpAMD64VPRORDMasked256Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPABSQ256 x) mask) + // match: (VPBLENDVB256 dst (VPRORQ256 [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPABSQMasked256Merging dst x (VPMOVVec64x4ToM mask)) + // result: (VPRORQMasked256Merging dst [a] x (VPMOVVec64x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPABSQ256 { + if v_1.Op != OpAMD64VPRORQ256 { break } + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPABSQMasked256Merging) + v.reset(OpAMD64VPRORQMasked256Merging) + v.AuxInt = uint8ToAuxInt(a) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPSHUFHW256 [a] x) mask) + // match: (VPBLENDVB256 dst (VPRORVD256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSHUFHWMasked256Merging dst [a] x (VPMOVVec16x16ToM mask)) + // result: (VPRORVDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSHUFHW256 { + if v_1.Op != OpAMD64VPRORVD256 { break } - a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHUFHWMasked256Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v.reset(OpAMD64VPRORVDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPSUBB256 x y) mask) + // match: (VPBLENDVB256 dst (VPRORVQ256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSUBBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) + // result: (VPRORVQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSUBB256 { + if v_1.Op != OpAMD64VPRORVQ256 { break } y := v_1.Args[1] @@ -46954,238 +47772,254 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSUBBMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v.reset(OpAMD64VPRORVQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VMAXPS256 x y) mask) + // match: (VPBLENDVB256 dst (VPSHLDD256 [a] x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VMAXPSMasked256Merging dst x y (VPMOVVec32x8ToM mask)) + // result: (VPSHLDDMasked256Merging dst [a] x y (VPMOVVec32x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VMAXPS256 { + if v_1.Op != OpAMD64VPSHLDD256 { break } + a := auxIntToUint8(v_1.AuxInt) y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VMAXPSMasked256Merging) + v.reset(OpAMD64VPSHLDDMasked256Merging) + v.AuxInt = uint8ToAuxInt(a) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPMAXSD256 x y) mask) + // match: (VPBLENDVB256 dst (VPSHLDQ256 [a] x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMAXSDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) + // result: (VPSHLDQMasked256Merging dst [a] x y (VPMOVVec64x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMAXSD256 { + if v_1.Op != OpAMD64VPSHLDQ256 { break } + a := auxIntToUint8(v_1.AuxInt) y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMAXSDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v.reset(OpAMD64VPSHLDQMasked256Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VMULPD256 x y) mask) + // match: (VPBLENDVB256 dst (VPSHLDW256 [a] x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VMULPDMasked256Merging dst x y (VPMOVVec64x4ToM mask)) + // result: (VPSHLDWMasked256Merging dst [a] x y (VPMOVVec16x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VMULPD256 { + if v_1.Op != OpAMD64VPSHLDW256 { break } + a := auxIntToUint8(v_1.AuxInt) y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VMULPDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VPSHLDWMasked256Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VDIVPS256 x y) mask) + // match: (VPBLENDVB256 dst (VPSHRDD256 [a] x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VDIVPSMasked256Merging dst x y (VPMOVVec32x8ToM mask)) + // result: (VPSHRDDMasked256Merging dst [a] x y (VPMOVVec32x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VDIVPS256 { + if v_1.Op != OpAMD64VPSHRDD256 { break } + a := auxIntToUint8(v_1.AuxInt) y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VDIVPSMasked256Merging) + v.reset(OpAMD64VPSHRDDMasked256Merging) + v.AuxInt = uint8ToAuxInt(a) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPMAXSQ256 x y) mask) + // match: (VPBLENDVB256 dst (VPSHRDQ256 [a] x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMAXSQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) + // result: (VPSHRDQMasked256Merging dst [a] x y (VPMOVVec64x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMAXSQ256 { + if v_1.Op != OpAMD64VPSHRDQ256 { break } + a := auxIntToUint8(v_1.AuxInt) y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMAXSQMasked256Merging) + v.reset(OpAMD64VPSHRDQMasked256Merging) + v.AuxInt = uint8ToAuxInt(a) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VMINPD256 x y) mask) + // match: (VPBLENDVB256 dst (VPSHRDW256 [a] x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VMINPDMasked256Merging dst x y (VPMOVVec64x4ToM mask)) + // result: (VPSHRDWMasked256Merging dst [a] x y (VPMOVVec16x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VMINPD256 { + if v_1.Op != OpAMD64VPSHRDW256 { break } + a := auxIntToUint8(v_1.AuxInt) y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VMINPDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VPSHRDWMasked256Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPSHLDQ256 [a] x y) mask) + // match: (VPBLENDVB256 dst (VPSHUFB256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSHLDQMasked256Merging dst [a] x y (VPMOVVec64x4ToM mask)) + // result: (VPSHUFBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSHLDQ256 { + if v_1.Op != OpAMD64VPSHUFB256 { break } - a := auxIntToUint8(v_1.AuxInt) y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHLDQMasked256Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VPSHUFBMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VSCALEFPD256 x y) mask) + // match: (VPBLENDVB256 dst (VPSHUFD256 [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VSCALEFPDMasked256Merging dst x y (VPMOVVec64x4ToM mask)) + // result: (VPSHUFDMasked256Merging dst [a] x (VPMOVVec32x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VSCALEFPD256 { + if v_1.Op != OpAMD64VPSHUFD256 { break } - y := v_1.Args[1] + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VSCALEFPDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VPSHUFDMasked256Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPMOVSWB128 x) mask) + // match: (VPBLENDVB256 dst (VPSHUFHW256 [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSWBMasked128Merging dst x (VPMOVVec16x16ToM mask)) + // result: (VPSHUFHWMasked256Merging dst [a] x (VPMOVVec16x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVSWB128 { + if v_1.Op != OpAMD64VPSHUFHW256 { break } + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVSWBMasked128Merging) + v.reset(OpAMD64VPSHUFHWMasked256Merging) + v.AuxInt = uint8ToAuxInt(a) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPMINSQ256 x y) mask) + // match: (VPBLENDVB256 dst (VPSLLD256const [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMINSQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) + // result: (VPSLLDMasked256constMerging dst [a] x (VPMOVVec32x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMINSQ256 { + if v_1.Op != OpAMD64VPSLLD256const { break } - y := v_1.Args[1] + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMINSQMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VPSLLDMasked256constMerging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPABSD256 x) mask) + // match: (VPBLENDVB256 dst (VPSLLQ256const [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPABSDMasked256Merging dst x (VPMOVVec32x8ToM mask)) + // result: (VPSLLQMasked256constMerging dst [a] x (VPMOVVec64x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPABSD256 { + if v_1.Op != OpAMD64VPSLLQ256const { break } + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPABSDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v.reset(OpAMD64VPSLLQMasked256constMerging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPMINUW256 x y) mask) + // match: (VPBLENDVB256 dst (VPSLLVD256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMINUWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) + // result: (VPSLLVDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMINUW256 { + if v_1.Op != OpAMD64VPSLLVD256 { break } y := v_1.Args[1] @@ -47194,121 +48028,121 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMINUWMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v.reset(OpAMD64VPSLLVDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPSHRDW256 [a] x y) mask) + // match: (VPBLENDVB256 dst (VPSLLVQ256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSHRDWMasked256Merging dst [a] x y (VPMOVVec16x16ToM mask)) + // result: (VPSLLVQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSHRDW256 { + if v_1.Op != OpAMD64VPSLLVQ256 { break } - a := auxIntToUint8(v_1.AuxInt) y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHRDWMasked256Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v.reset(OpAMD64VPSLLVQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPMOVZXBW512 x) mask) + // match: (VPBLENDVB256 dst (VPSLLVW256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVZXBWMasked512Merging dst x (VPMOVVec8x32ToM mask)) + // result: (VPSLLVWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVZXBW512 { + if v_1.Op != OpAMD64VPSLLVW256 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVZXBWMasked512Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v.reset(OpAMD64VPSLLVWMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPMAXUD256 x y) mask) + // match: (VPBLENDVB256 dst (VPSLLW256const [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMAXUDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) + // result: (VPSLLWMasked256constMerging dst [a] x (VPMOVVec16x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMAXUD256 { + if v_1.Op != OpAMD64VPSLLW256const { break } - y := v_1.Args[1] + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMAXUDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v.reset(OpAMD64VPSLLWMasked256constMerging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPMAXSB256 x y) mask) + // match: (VPBLENDVB256 dst (VPSRAD256const [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMAXSBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) + // result: (VPSRADMasked256constMerging dst [a] x (VPMOVVec32x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMAXSB256 { + if v_1.Op != OpAMD64VPSRAD256const { break } - y := v_1.Args[1] + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMAXSBMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v.reset(OpAMD64VPSRADMasked256constMerging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPSHRDQ256 [a] x y) mask) + // match: (VPBLENDVB256 dst (VPSRAQ256const [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSHRDQMasked256Merging dst [a] x y (VPMOVVec64x4ToM mask)) + // result: (VPSRAQMasked256constMerging dst [a] x (VPMOVVec64x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSHRDQ256 { + if v_1.Op != OpAMD64VPSRAQ256const { break } a := auxIntToUint8(v_1.AuxInt) - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHRDQMasked256Merging) + v.reset(OpAMD64VPSRAQMasked256constMerging) v.AuxInt = uint8ToAuxInt(a) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPMADDUBSW256 x y) mask) + // match: (VPBLENDVB256 dst (VPSRAVD256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMADDUBSWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) + // result: (VPSRAVDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMADDUBSW256 { + if v_1.Op != OpAMD64VPSRAVD256 { break } y := v_1.Args[1] @@ -47317,39 +48151,38 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMADDUBSWMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v.reset(OpAMD64VPSRAVDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPSLLD256const [a] x) mask) + // match: (VPBLENDVB256 dst (VPSRAVQ256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSLLDMasked256constMerging dst [a] x (VPMOVVec32x8ToM mask)) + // result: (VPSRAVQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSLLD256const { + if v_1.Op != OpAMD64VPSRAVQ256 { break } - a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSLLDMasked256constMerging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v.reset(OpAMD64VPSRAVQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPMINUQ256 x y) mask) + // match: (VPBLENDVB256 dst (VPSRAVW256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMINUQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) + // result: (VPSRAVWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMINUQ256 { + if v_1.Op != OpAMD64VPSRAVW256 { break } y := v_1.Args[1] @@ -47358,59 +48191,59 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMINUQMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VPSRAVWMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VRCP14PD256 x) mask) + // match: (VPBLENDVB256 dst (VPSRAW256const [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VRCP14PDMasked256Merging dst x (VPMOVVec64x4ToM mask)) + // result: (VPSRAWMasked256constMerging dst [a] x (VPMOVVec16x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VRCP14PD256 { + if v_1.Op != OpAMD64VPSRAW256const { break } + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VRCP14PDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VPSRAWMasked256constMerging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPSHRDD256 [a] x y) mask) + // match: (VPBLENDVB256 dst (VPSRLVD256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSHRDDMasked256Merging dst [a] x y (VPMOVVec32x8ToM mask)) + // result: (VPSRLVDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSHRDD256 { + if v_1.Op != OpAMD64VPSRLVD256 { break } - a := auxIntToUint8(v_1.AuxInt) y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHRDDMasked256Merging) - v.AuxInt = uint8ToAuxInt(a) + v.reset(OpAMD64VPSRLVDMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPADDQ256 x y) mask) + // match: (VPBLENDVB256 dst (VPSRLVQ256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPADDQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) + // result: (VPSRLVQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPADDQ256 { + if v_1.Op != OpAMD64VPSRLVQ256 { break } y := v_1.Args[1] @@ -47419,18 +48252,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPADDQMasked256Merging) + v.reset(OpAMD64VPSRLVQMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPMAXUB256 x y) mask) + // match: (VPBLENDVB256 dst (VPSRLVW256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMAXUBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) + // result: (VPSRLVWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMAXUB256 { + if v_1.Op != OpAMD64VPSRLVW256 { break } y := v_1.Args[1] @@ -47439,39 +48272,38 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMAXUBMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v.reset(OpAMD64VPSRLVWMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPRORQ256 [a] x) mask) + // match: (VPBLENDVB256 dst (VPSUBB256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPRORQMasked256Merging dst [a] x (VPMOVVec64x4ToM mask)) + // result: (VPSUBBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPRORQ256 { + if v_1.Op != OpAMD64VPSUBB256 { break } - a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPRORQMasked256Merging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VPSUBBMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VADDPD256 x y) mask) + // match: (VPBLENDVB256 dst (VPSUBD256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VADDPDMasked256Merging dst x y (VPMOVVec64x4ToM mask)) + // result: (VPSUBDMasked256Merging dst x y (VPMOVVec32x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VADDPD256 { + if v_1.Op != OpAMD64VPSUBD256 { break } y := v_1.Args[1] @@ -47480,18 +48312,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VADDPDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VPSUBDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPSHUFB256 x y) mask) + // match: (VPBLENDVB256 dst (VPSUBQ256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSHUFBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) + // result: (VPSUBQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSHUFB256 { + if v_1.Op != OpAMD64VPSUBQ256 { break } y := v_1.Args[1] @@ -47500,39 +48332,38 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSHUFBMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v.reset(OpAMD64VPSUBQMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPSRAD256const [a] x) mask) + // match: (VPBLENDVB256 dst (VPSUBSB256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRADMasked256constMerging dst [a] x (VPMOVVec32x8ToM mask)) + // result: (VPSUBSBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSRAD256const { + if v_1.Op != OpAMD64VPSUBSB256 { break } - a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRADMasked256constMerging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v.reset(OpAMD64VPSUBSBMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPSUBW256 x y) mask) + // match: (VPBLENDVB256 dst (VPSUBSW256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSUBWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) + // result: (VPSUBSWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSUBW256 { + if v_1.Op != OpAMD64VPSUBSW256 { break } y := v_1.Args[1] @@ -47541,58 +48372,58 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSUBWMasked256Merging) + v.reset(OpAMD64VPSUBSWMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPSRAW256const [a] x) mask) + // match: (VPBLENDVB256 dst (VPSUBUSB256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRAWMasked256constMerging dst [a] x (VPMOVVec16x16ToM mask)) + // result: (VPSUBUSBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSRAW256const { + if v_1.Op != OpAMD64VPSUBUSB256 { break } - a := auxIntToUint8(v_1.AuxInt) + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRAWMasked256constMerging) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v.reset(OpAMD64VPSUBUSBMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPABSW256 x) mask) + // match: (VPBLENDVB256 dst (VPSUBUSW256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPABSWMasked256Merging dst x (VPMOVVec16x16ToM mask)) + // result: (VPSUBUSWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPABSW256 { + if v_1.Op != OpAMD64VPSUBUSW256 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPABSWMasked256Merging) + v.reset(OpAMD64VPSUBUSWMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPACKSSDW256 x y) mask) + // match: (VPBLENDVB256 dst (VPSUBW256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPACKSSDWMasked256Merging dst x y (VPMOVVec32x8ToM mask)) + // result: (VPSUBWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPACKSSDW256 { + if v_1.Op != OpAMD64VPSUBW256 { break } y := v_1.Args[1] @@ -47601,18 +48432,18 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPACKSSDWMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v.reset(OpAMD64VPSUBWMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPMOVSQD128 x) mask) + // match: (VPBLENDVB256 dst (VRCP14PD256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVSQDMasked128Merging dst x (VPMOVVec64x4ToM mask)) + // result: (VRCP14PDMasked256Merging dst x (VPMOVVec64x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVSQD128 { + if v_1.Op != OpAMD64VRCP14PD256 { break } x := v_1.Args[0] @@ -47620,116 +48451,121 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVSQDMasked128Merging) + v.reset(OpAMD64VRCP14PDMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPLZCNTD256 x) mask) + // match: (VPBLENDVB256 dst (VREDUCEPD256 [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPLZCNTDMasked256Merging dst x (VPMOVVec32x8ToM mask)) + // result: (VREDUCEPDMasked256Merging dst [a] x (VPMOVVec64x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPLZCNTD256 { + if v_1.Op != OpAMD64VREDUCEPD256 { break } + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPLZCNTDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v.reset(OpAMD64VREDUCEPDMasked256Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VMAXPD256 x y) mask) + // match: (VPBLENDVB256 dst (VREDUCEPS256 [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VMAXPDMasked256Merging dst x y (VPMOVVec64x4ToM mask)) + // result: (VREDUCEPSMasked256Merging dst [a] x (VPMOVVec32x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VMAXPD256 { + if v_1.Op != OpAMD64VREDUCEPS256 { break } - y := v_1.Args[1] + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VMAXPDMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VREDUCEPSMasked256Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPAVGW256 x y) mask) + // match: (VPBLENDVB256 dst (VRNDSCALEPD256 [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPAVGWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) + // result: (VRNDSCALEPDMasked256Merging dst [a] x (VPMOVVec64x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPAVGW256 { + if v_1.Op != OpAMD64VRNDSCALEPD256 { break } - y := v_1.Args[1] + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPAVGWMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v.reset(OpAMD64VRNDSCALEPDMasked256Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPOPCNTQ256 x) mask) + // match: (VPBLENDVB256 dst (VRNDSCALEPS256 [a] x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPOPCNTQMasked256Merging dst x (VPMOVVec64x4ToM mask)) + // result: (VRNDSCALEPSMasked256Merging dst [a] x (VPMOVVec32x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPOPCNTQ256 { + if v_1.Op != OpAMD64VRNDSCALEPS256 { break } + a := auxIntToUint8(v_1.AuxInt) x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPOPCNTQMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VRNDSCALEPSMasked256Merging) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPSUBSW256 x y) mask) + // match: (VPBLENDVB256 dst (VRSQRT14PD256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSUBSWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) + // result: (VRSQRT14PDMasked256Merging dst x (VPMOVVec64x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSUBSW256 { + if v_1.Op != OpAMD64VRSQRT14PD256 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSUBSWMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v.reset(OpAMD64VRSQRT14PDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPMAXUW256 x y) mask) + // match: (VPBLENDVB256 dst (VSCALEFPD256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMAXUWMasked256Merging dst x y (VPMOVVec16x16ToM mask)) + // result: (VSCALEFPDMasked256Merging dst x y (VPMOVVec64x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMAXUW256 { + if v_1.Op != OpAMD64VSCALEFPD256 { break } y := v_1.Args[1] @@ -47738,57 +48574,57 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMAXUWMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v.reset(OpAMD64VSCALEFPDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPOPCNTW256 x) mask) + // match: (VPBLENDVB256 dst (VSCALEFPS256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPOPCNTWMasked256Merging dst x (VPMOVVec16x16ToM mask)) + // result: (VSCALEFPSMasked256Merging dst x y (VPMOVVec32x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPOPCNTW256 { + if v_1.Op != OpAMD64VSCALEFPS256 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPOPCNTWMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v.reset(OpAMD64VSCALEFPSMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPSRAVQ256 x y) mask) + // match: (VPBLENDVB256 dst (VSQRTPD256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSRAVQMasked256Merging dst x y (VPMOVVec64x4ToM mask)) + // result: (VSQRTPDMasked256Merging dst x (VPMOVVec64x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSRAVQ256 { + if v_1.Op != OpAMD64VSQRTPD256 { break } - y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSRAVQMasked256Merging) + v.reset(OpAMD64VSQRTPDMasked256Merging) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg4(dst, x, y, v0) + v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPABSB256 x) mask) + // match: (VPBLENDVB256 dst (VSQRTPS256 x) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPABSBMasked256Merging dst x (VPMOVVec8x32ToM mask)) + // result: (VSQRTPSMasked256Merging dst x (VPMOVVec32x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPABSB256 { + if v_1.Op != OpAMD64VSQRTPS256 { break } x := v_1.Args[0] @@ -47796,37 +48632,38 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPABSBMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v.reset(OpAMD64VSQRTPSMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(dst, x, v0) return true } - // match: (VPBLENDVB256 dst (VPMOVDW128 x) mask) + // match: (VPBLENDVB256 dst (VSUBPD256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPMOVDWMasked128Merging dst x (VPMOVVec32x8ToM mask)) + // result: (VSUBPDMasked256Merging dst x y (VPMOVVec64x4ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPMOVDW128 { + if v_1.Op != OpAMD64VSUBPD256 { break } + y := v_1.Args[1] x := v_1.Args[0] mask := v_2 if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPMOVDWMasked128Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v.reset(OpAMD64VSUBPDMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg3(dst, x, v0) + v.AddArg4(dst, x, y, v0) return true } - // match: (VPBLENDVB256 dst (VPSUBSB256 x y) mask) + // match: (VPBLENDVB256 dst (VSUBPS256 x y) mask) // cond: v.Block.CPUfeatures.hasFeature(CPUavx512) - // result: (VPSUBSBMasked256Merging dst x y (VPMOVVec8x32ToM mask)) + // result: (VSUBPSMasked256Merging dst x y (VPMOVVec32x8ToM mask)) for { dst := v_0 - if v_1.Op != OpAMD64VPSUBSB256 { + if v_1.Op != OpAMD64VSUBPS256 { break } y := v_1.Args[1] @@ -47835,8 +48672,8 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool { if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) { break } - v.reset(OpAMD64VPSUBSBMasked256Merging) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask) + v.reset(OpAMD64VSUBPSMasked256Merging) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg4(dst, x, y, v0) return true diff --git a/src/simd/_gen/simdgen/gen_simdrules.go b/src/simd/_gen/simdgen/gen_simdrules.go index 8dd1707da9..19393add71 100644 --- a/src/simd/_gen/simdgen/gen_simdrules.go +++ b/src/simd/_gen/simdgen/gen_simdrules.go @@ -318,13 +318,25 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer { case 128, 256: // VPBLENDVB cases. noMaskName := machineOpName(NoMask, gOp) - maskedMergeOpts[noMaskName] = fmt.Sprintf("(VPBLENDVB%d dst (%s %s) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (%sMerging dst %s (VPMOVVec%dx%dToM mask))\n", + ruleExisting, ok := maskedMergeOpts[noMaskName] + rule := fmt.Sprintf("(VPBLENDVB%d dst (%s %s) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (%sMerging dst %s (VPMOVVec%dx%dToM mask))\n", *maskElem.Bits, noMaskName, data.Args, data.Asm, data.Args, *maskElem.ElemBits, *maskElem.Lanes) + if ok && ruleExisting != rule { + panic("multiple masked merge rules for one op") + } else { + maskedMergeOpts[noMaskName] = rule + } case 512: // VPBLENDM[BWDQ] cases. noMaskName := machineOpName(NoMask, gOp) - maskedMergeOpts[noMaskName] = fmt.Sprintf("(VPBLENDM%sMasked%d dst (%s %s) mask) => (%sMerging dst %s mask)\n", + ruleExisting, ok := maskedMergeOpts[noMaskName] + rule := fmt.Sprintf("(VPBLENDM%sMasked%d dst (%s %s) mask) => (%sMerging dst %s mask)\n", s2n[*maskElem.ElemBits], *maskElem.Bits, noMaskName, data.Args, data.Asm, data.Args) + if ok && ruleExisting != rule { + panic("multiple masked merge rules for one op") + } else { + maskedMergeOpts[noMaskName] = rule + } } } @@ -362,10 +374,15 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer { } } + maskedMergeOptsRules := []string{} for asm, rule := range maskedMergeOpts { if !asmCheck[asm] { continue } + maskedMergeOptsRules = append(maskedMergeOptsRules, rule) + } + slices.Sort(maskedMergeOptsRules) + for _, rule := range maskedMergeOptsRules { buffer.WriteString(rule) } diff --git a/src/simd/_gen/simdgen/godefs.go b/src/simd/_gen/simdgen/godefs.go index f42251c5c3..7d3943b4b8 100644 --- a/src/simd/_gen/simdgen/godefs.go +++ b/src/simd/_gen/simdgen/godefs.go @@ -133,6 +133,25 @@ func (o *Operation) VectorWidth() int { panic(fmt.Errorf("Figure out what the vector width is for %v and implement it", *o)) } +// Right now simdgen computes the machine op name for most instructions +// as $Name$OutputSize, by this denotation, these instructions are "overloaded". +// for example: +// (Uint16x8) ConvertToInt8 +// (Uint16x16) ConvertToInt8 +// are both VPMOVWB128. +// To make them distinguishable we need to append the input size to them as well. +// TODO: document them well in the generated code. +var demotingConvertOps = map[string]bool{ + "VPMOVQD128": true, "VPMOVSQD128": true, "VPMOVUSQD128": true, "VPMOVQW128": true, "VPMOVSQW128": true, + "VPMOVUSQW128": true, "VPMOVDW128": true, "VPMOVSDW128": true, "VPMOVUSDW128": true, "VPMOVQB128": true, + "VPMOVSQB128": true, "VPMOVUSQB128": true, "VPMOVDB128": true, "VPMOVSDB128": true, "VPMOVUSDB128": true, + "VPMOVWB128": true, "VPMOVSWB128": true, "VPMOVUSWB128": true, + "VPMOVQDMasked128": true, "VPMOVSQDMasked128": true, "VPMOVUSQDMasked128": true, "VPMOVQWMasked128": true, "VPMOVSQWMasked128": true, + "VPMOVUSQWMasked128": true, "VPMOVDWMasked128": true, "VPMOVSDWMasked128": true, "VPMOVUSDWMasked128": true, "VPMOVQBMasked128": true, + "VPMOVSQBMasked128": true, "VPMOVUSQBMasked128": true, "VPMOVDBMasked128": true, "VPMOVSDBMasked128": true, "VPMOVUSDBMasked128": true, + "VPMOVWBMasked128": true, "VPMOVSWBMasked128": true, "VPMOVUSWBMasked128": true, +} + func machineOpName(maskType maskShape, gOp Operation) string { asm := gOp.Asm if maskType == OneMask { @@ -142,6 +161,11 @@ func machineOpName(maskType maskShape, gOp Operation) string { if gOp.SSAVariant != nil { asm += *gOp.SSAVariant } + if demotingConvertOps[asm] { + // Need to append the size of the source as well. + // TODO: should be "%sto%d". + asm = fmt.Sprintf("%s_%d", asm, *gOp.In[0].Bits) + } return asm } -- 2.52.0